Propagate the frame where the resource var was created to resource var handles
Allows showing informative error messages, indicating where the resource var was created. PiperOrigin-RevId: 353729467 Change-Id: I90c19bfecc2d2df19523b28a0caeea2abe331fae
This commit is contained in:
parent
ba088e0369
commit
1b0d50b051
tensorflow
compiler/jit
core
framework
kernels
platform
util
python
@ -89,9 +89,25 @@ Status GetVariableInfosFromInputs(ResourceMgr* rm, DeviceBase* dev,
|
||||
Var* variable = nullptr;
|
||||
ResourceHandle handle = inputs[var_idx]->flat<ResourceHandle>()(0);
|
||||
if (handle.device() != dev->attributes().name()) {
|
||||
return errors::InvalidArgument(
|
||||
"Trying to access resource ", handle.name(), " located in device ",
|
||||
handle.device(), " from device ", dev->attributes().name());
|
||||
std::string definition_location = [&]() -> std::string {
|
||||
if (handle.definition_stack_trace()) {
|
||||
std::vector<StackFrame> stack_frames =
|
||||
handle.definition_stack_trace()->ToStackFrames(
|
||||
{}, IsInternalFrameForFilename,
|
||||
/*reverse_traversal=*/true,
|
||||
/*limit=*/1);
|
||||
if (!stack_frames.empty()) {
|
||||
const StackFrame& last_frame = stack_frames[0];
|
||||
return absl::StrCat(" (defined @ ", last_frame.file_name, ":",
|
||||
last_frame.line_number, ")");
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}();
|
||||
return errors::InvalidArgument("Trying to access resource ",
|
||||
handle.name(), definition_location,
|
||||
" located in device ", handle.device(),
|
||||
" from device ", dev->attributes().name());
|
||||
}
|
||||
TF_RETURN_IF_ERROR(rm->LookupOrCreate<Var>(
|
||||
handle.container(), handle.name(), &variable, [](Var** ptr) {
|
||||
|
@ -706,6 +706,7 @@ cc_library(
|
||||
"//tensorflow/core/lib/strings:strcat",
|
||||
"//tensorflow/core/platform:tensor_coding",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core/util:abstract_stack_trace",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
@ -801,6 +802,7 @@ tf_cuda_library(
|
||||
"//tensorflow/core/platform:tensor_coding",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core/public:version",
|
||||
"//tensorflow/core/util:abstract_stack_trace",
|
||||
"//third_party/eigen3",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/types.pb.h"
|
||||
#include "tensorflow/core/platform/tensor_coding.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/util/managed_stack_trace.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
@ -71,6 +72,15 @@ class ResourceHandle {
|
||||
dtypes_and_shapes_ = dtypes_and_shapes;
|
||||
}
|
||||
|
||||
void set_definition_stack_trace(
|
||||
const absl::optional<ManagedStackTrace>& definition_stack_trace) {
|
||||
definition_stack_trace_ = definition_stack_trace;
|
||||
}
|
||||
|
||||
const absl::optional<ManagedStackTrace>& definition_stack_trace() const {
|
||||
return definition_stack_trace_;
|
||||
}
|
||||
|
||||
// Conversion to and from ResourceHandleProto
|
||||
void AsProto(ResourceHandleProto* proto) const;
|
||||
void FromProto(const ResourceHandleProto& proto);
|
||||
@ -93,6 +103,7 @@ class ResourceHandle {
|
||||
uint64 hash_code_ = 0;
|
||||
std::string maybe_type_name_;
|
||||
std::vector<DtypeAndPartialTensorShape> dtypes_and_shapes_;
|
||||
absl::optional<ManagedStackTrace> definition_stack_trace_;
|
||||
};
|
||||
|
||||
// For backwards compatibility for when this was a proto
|
||||
|
@ -36,10 +36,12 @@ static std::atomic<int64> current_id_;
|
||||
ResourceHandle MakeResourceHandle(
|
||||
const string& container, const string& name, const DeviceBase& device,
|
||||
const TypeIndex& type_index,
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes) {
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes,
|
||||
const absl::optional<ManagedStackTrace>& definition_stack_trace) {
|
||||
ResourceHandle result;
|
||||
result.set_device(device.name());
|
||||
result.set_container(container);
|
||||
result.set_definition_stack_trace(definition_stack_trace);
|
||||
if (name == ResourceHandle::ANONYMOUS_NAME) {
|
||||
result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
|
||||
} else {
|
||||
|
@ -296,28 +296,33 @@ class ResourceMgr {
|
||||
ResourceHandle MakeResourceHandle(
|
||||
const std::string& container, const std::string& name,
|
||||
const DeviceBase& device, const TypeIndex& type_index,
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes = {})
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes = {},
|
||||
const absl::optional<ManagedStackTrace>& definition_stack_trace = {})
|
||||
TF_MUST_USE_RESULT;
|
||||
|
||||
template <typename T>
|
||||
ResourceHandle MakeResourceHandle(
|
||||
OpKernelContext* ctx, const std::string& container, const std::string& name,
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes = {}) {
|
||||
return MakeResourceHandle(
|
||||
container.empty() ? ctx->resource_manager()->default_container()
|
||||
: container,
|
||||
name, *ctx->device(), TypeIndex::Make<T>(), dtypes_and_shapes);
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes = {},
|
||||
const absl::optional<ManagedStackTrace>& definition_stack_trace = {}) {
|
||||
return MakeResourceHandle(container.empty()
|
||||
? ctx->resource_manager()->default_container()
|
||||
: container,
|
||||
name, *ctx->device(), TypeIndex::Make<T>(),
|
||||
dtypes_and_shapes, definition_stack_trace);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ResourceHandle MakeResourceHandle(
|
||||
OpKernelConstruction* ctx, const std::string& container,
|
||||
const std::string& name,
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes = {}) {
|
||||
return MakeResourceHandle(
|
||||
container.empty() ? ctx->resource_manager()->default_container()
|
||||
: container,
|
||||
name, *ctx->device(), TypeIndex::Make<T>(), dtypes_and_shapes);
|
||||
const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes = {},
|
||||
const absl::optional<ManagedStackTrace>& definition_stack_trace = {}) {
|
||||
return MakeResourceHandle(container.empty()
|
||||
? ctx->resource_manager()->default_container()
|
||||
: container,
|
||||
name, *ctx->device(), TypeIndex::Make<T>(),
|
||||
dtypes_and_shapes, definition_stack_trace);
|
||||
}
|
||||
|
||||
Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
|
||||
|
@ -245,7 +245,8 @@ void VarHandleOp::Compute(OpKernelContext* ctx) {
|
||||
ctx, ctx->allocate_temp(DT_RESOURCE, TensorShape({}), &handle, attr));
|
||||
handle.scalar<ResourceHandle>()() = MakeResourceHandle<Var>(
|
||||
ctx, container_, name_,
|
||||
std::vector<DtypeAndPartialTensorShape>{dtype_and_shape_});
|
||||
std::vector<DtypeAndPartialTensorShape>{dtype_and_shape_},
|
||||
ctx->stack_trace());
|
||||
ctx->set_output(0, handle);
|
||||
} else {
|
||||
ctx->set_output(0, resource_);
|
||||
|
@ -633,6 +633,11 @@ cc_library(
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "stack_frame",
|
||||
hdrs = ["stack_frame.h"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "status",
|
||||
srcs = ["status.cc"],
|
||||
@ -641,6 +646,7 @@ cc_library(
|
||||
":logging",
|
||||
":macros",
|
||||
":mutex",
|
||||
":stack_frame",
|
||||
":stacktrace",
|
||||
":str_util",
|
||||
":strcat",
|
||||
@ -1385,6 +1391,7 @@ filegroup(
|
||||
"ram_file_system.h",
|
||||
"random.h",
|
||||
"resource.h",
|
||||
"stack_frame.h",
|
||||
"stacktrace.h",
|
||||
"stacktrace_handler.h",
|
||||
"status.h",
|
||||
@ -1633,6 +1640,7 @@ filegroup(
|
||||
"snappy.h",
|
||||
"stacktrace.h",
|
||||
"status.cc",
|
||||
"stack_frame.h",
|
||||
"status.h",
|
||||
"str_util.cc",
|
||||
"str_util.h",
|
||||
|
39
tensorflow/core/platform/stack_frame.h
Normal file
39
tensorflow/core/platform/stack_frame.h
Normal file
@ -0,0 +1,39 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PLATFORM_STACK_TRACE_H_
|
||||
#define TENSORFLOW_CORE_PLATFORM_STACK_TRACE_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// A struct representing a frame in a stack trace.
|
||||
struct StackFrame {
|
||||
std::string file_name;
|
||||
int line_number;
|
||||
std::string function_name;
|
||||
|
||||
bool operator==(const StackFrame& other) const {
|
||||
return line_number == other.line_number &&
|
||||
function_name == other.function_name && file_name == other.file_name;
|
||||
}
|
||||
|
||||
bool operator!=(const StackFrame& other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_PLATFORM_STACK_TRACE_H_
|
@ -24,26 +24,13 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/stack_frame.h"
|
||||
#include "tensorflow/core/platform/stringpiece.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/protobuf/error_codes.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// A struct representing a frame in a stack trace.
|
||||
struct StackFrame {
|
||||
std::string file_name;
|
||||
int line_number;
|
||||
std::string function_name;
|
||||
|
||||
bool operator==(const StackFrame& other) const {
|
||||
return line_number == other.line_number &&
|
||||
function_name == other.function_name && file_name == other.file_name;
|
||||
}
|
||||
|
||||
bool operator!=(const StackFrame& other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
#if defined(__clang__)
|
||||
// Only clang supports warn_unused_result as a type annotation.
|
||||
class TF_MUST_USE_RESULT Status;
|
||||
|
@ -474,14 +474,16 @@ cc_library(
|
||||
"//tensorflow/c/eager:__pkg__",
|
||||
"//tensorflow/core:__pkg__",
|
||||
"//tensorflow/core/common_runtime/eager:__pkg__",
|
||||
"//tensorflow/core/framework:__pkg__",
|
||||
"//tensorflow/core/platform:__pkg__",
|
||||
"//tensorflow/python:__pkg__",
|
||||
"//tensorflow/python/eager:__pkg__",
|
||||
"//tensorflow/python/util:__pkg__",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core/platform:status",
|
||||
"//tensorflow/core/platform:stack_frame",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -17,9 +17,11 @@ limitations under the License.
|
||||
#define TENSORFLOW_CORE_UTIL_ABSTRACT_STACK_TRACE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/match.h"
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "tensorflow/core/platform/stack_frame.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
|
@ -944,6 +944,26 @@ class DefFunctionTest(xla_test.XLATestCase):
|
||||
self.assertEqual(cell_nojit.value(), orig_nojit + 2)
|
||||
self.assertEqual(cell_jit.value(), orig_jit + 3)
|
||||
|
||||
@test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns '
|
||||
' wrong status type')
|
||||
def testResourceWrongDevice(self):
|
||||
if 'gpu' not in self.device.lower():
|
||||
self.skipTest('Need a GPU to have non-trivial device placement')
|
||||
|
||||
with ops.device('device:CPU:0'):
|
||||
v = variables.Variable([3.1, 3.2])
|
||||
|
||||
with ops.device('device:{}:0'.format(self.device)):
|
||||
|
||||
@def_function.function(experimental_compile=True)
|
||||
def update_var(a):
|
||||
v.assign_add(a)
|
||||
|
||||
arg = random_ops.random_normal([2])
|
||||
with self.assertRaisesRegex(errors.InvalidArgumentError,
|
||||
'def_function_xla_jit_test.py'):
|
||||
update_var(arg)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
ops.enable_eager_execution()
|
||||
|
@ -341,6 +341,7 @@ cc_library(
|
||||
srcs = ["stack_trace.cc"],
|
||||
hdrs = ["stack_trace.h"],
|
||||
deps = [
|
||||
"//tensorflow/core/platform:status",
|
||||
"//tensorflow/core/platform:str_util",
|
||||
"//tensorflow/core/platform:stringpiece",
|
||||
"//tensorflow/core/util:abstract_stack_trace",
|
||||
|
@ -30,6 +30,7 @@ limitations under the License.
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/container/inlined_vector.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/util/managed_stack_trace.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
Loading…
Reference in New Issue
Block a user