Implement async Python stack trace error reporting.
PiperOrigin-RevId: 322211018 Change-Id: I0d02e98748cc79cca04dc71fc828990548fce70f
This commit is contained in:
parent
6ee2d328fd
commit
fa85309fb5
@ -308,6 +308,7 @@ cc_library(
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/util:abstract_stack_trace",
|
||||
"@com_google_absl//absl/types:span",
|
||||
],
|
||||
)
|
||||
|
@ -26,6 +26,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/types.pb.h"
|
||||
#include "tensorflow/core/platform/casts.h"
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/util/abstract_stack_trace.h"
|
||||
|
||||
struct TFE_Op;
|
||||
|
||||
@ -44,6 +45,12 @@ class ImmediateExecutionOperation : public AbstractOperation {
|
||||
// Experimental
|
||||
virtual Status SetUseXla(bool enable) = 0;
|
||||
|
||||
// Set stack trace to be used for potential async error reporting.
|
||||
virtual void SetStackTrace(AbstractStackTrace stack_trace) = 0;
|
||||
|
||||
// Returns the stack trace set by `SetStackTrace` if exists.
|
||||
virtual absl::optional<AbstractStackTrace> GetStackTrace() = 0;
|
||||
|
||||
// For LLVM style RTTI.
|
||||
static bool classof(const AbstractOperation* ptr) {
|
||||
return ptr->getKind() == kEager || ptr->getKind() == kTfrt;
|
||||
|
@ -163,6 +163,7 @@ tf_cuda_library(
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core/platform:platform_port",
|
||||
"//tensorflow/core/util:abstract_stack_trace",
|
||||
] + select({
|
||||
"//tensorflow:android": [
|
||||
"//tensorflow/core:portable_tensorflow_lib_lite",
|
||||
|
@ -306,6 +306,7 @@ Status EagerOperation::Reset(
|
||||
}
|
||||
attrs_.Reset(op);
|
||||
use_xla_ = false;
|
||||
stack_trace_.reset();
|
||||
is_function_ = is_function;
|
||||
cancellation_manager_ = nullptr;
|
||||
executor_ = executor ? executor : &ctx_.Executor();
|
||||
|
@ -29,6 +29,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/cancellation.h"
|
||||
#include "tensorflow/core/framework/device_attributes.pb.h"
|
||||
#include "tensorflow/core/framework/op_def.pb.h"
|
||||
#include "tensorflow/core/util/abstract_stack_trace.h"
|
||||
#include "tensorflow/core/util/device_name_utils.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -120,6 +121,14 @@ class EagerOperation : public ImmediateExecutionOperation {
|
||||
|
||||
Status SetUseXla(bool enable) override;
|
||||
|
||||
void SetStackTrace(AbstractStackTrace stack_trace) override {
|
||||
stack_trace_ = stack_trace;
|
||||
}
|
||||
|
||||
absl::optional<AbstractStackTrace> GetStackTrace() override {
|
||||
return stack_trace_;
|
||||
}
|
||||
|
||||
Status Reset(const char* op, const char* device_name, bool remote,
|
||||
EagerExecutor* executor,
|
||||
const absl::optional<EagerRemoteFunctionParams>
|
||||
@ -218,6 +227,7 @@ class EagerOperation : public ImmediateExecutionOperation {
|
||||
VariantDevice device_;
|
||||
|
||||
bool use_xla_ = false;
|
||||
absl::optional<AbstractStackTrace> stack_trace_;
|
||||
bool is_function_; // Conceptually const, but can't be because of Reset
|
||||
bool colocation_exempt_;
|
||||
CancellationManager* cancellation_manager_ = nullptr; // Not owned.
|
||||
|
@ -634,7 +634,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
|
||||
auto node = absl::make_unique<AsyncExecuteNode>(
|
||||
&ctx, op->Inputs(), op->remote_func_params(), std::move(kernel),
|
||||
graph_collector, op->GetCancellationManager(),
|
||||
absl::Span<TensorHandle*>(retvals, num_outputs));
|
||||
absl::Span<TensorHandle*>(retvals, num_outputs), op->GetStackTrace());
|
||||
// Release the inputs from the eager operation since the AsyncExecuteNode
|
||||
// would have taken ownership. This allows the inputs to be forwarded if
|
||||
// possible.
|
||||
|
@ -150,14 +150,16 @@ class AsyncExecuteNode : public EagerNode {
|
||||
core::RefCountPtr<KernelAndDevice> kernel,
|
||||
GraphCollector* graph_collector,
|
||||
CancellationManager* cancellation_manager,
|
||||
absl::Span<TensorHandle*> retvals)
|
||||
absl::Span<TensorHandle*> retvals,
|
||||
absl::optional<AbstractStackTrace> stack_trace)
|
||||
: EagerNode(),
|
||||
ctx_(ctx),
|
||||
inputs_(inputs),
|
||||
remote_func_params_(remote_func_params),
|
||||
kernel_(std::move(kernel)),
|
||||
graph_collector_(graph_collector),
|
||||
cancellation_manager_(cancellation_manager) {
|
||||
cancellation_manager_(cancellation_manager),
|
||||
stack_trace_(stack_trace) {
|
||||
// Copy the output handles, since the container for them might get
|
||||
// destroyed.
|
||||
for (auto handle : retvals) {
|
||||
@ -194,10 +196,14 @@ class AsyncExecuteNode : public EagerNode {
|
||||
}
|
||||
++i;
|
||||
}
|
||||
const Status status = EagerKernelExecute(
|
||||
Status status = EagerKernelExecute(
|
||||
ctx_, inputs_, remote_func_params_, kernel_, graph_collector_,
|
||||
cancellation_manager_, absl::MakeSpan(retvals_));
|
||||
if (!status.ok()) {
|
||||
if (stack_trace_.has_value()) {
|
||||
status = Status(status.code(), status.error_message(),
|
||||
stack_trace_->ToStackFrames());
|
||||
}
|
||||
Abort(status);
|
||||
return status;
|
||||
}
|
||||
@ -227,6 +233,7 @@ class AsyncExecuteNode : public EagerNode {
|
||||
core::RefCountPtr<KernelAndDevice> kernel_;
|
||||
GraphCollector* graph_collector_;
|
||||
CancellationManager* const cancellation_manager_;
|
||||
absl::optional<AbstractStackTrace> stack_trace_;
|
||||
absl::InlinedVector<TensorHandle*, 2> retvals_;
|
||||
};
|
||||
|
||||
|
@ -62,9 +62,11 @@ inline const strings::AlphaNum& PrepareForStrCat(const strings::AlphaNum& a) {
|
||||
// to be several layers of additional context.
|
||||
template <typename... Args>
|
||||
void AppendToMessage(::tensorflow::Status* status, Args... args) {
|
||||
std::vector<StackFrame> stack_trace = status->stack_trace();
|
||||
*status = ::tensorflow::Status(
|
||||
status->code(),
|
||||
::tensorflow::strings::StrCat(status->error_message(), "\n\t", args...));
|
||||
::tensorflow::strings::StrCat(status->error_message(), "\n\t", args...),
|
||||
std::move(stack_trace));
|
||||
}
|
||||
|
||||
// For propagating errors when calling a function.
|
||||
|
@ -89,11 +89,13 @@ class StatusLogSink : public TFLogSink {
|
||||
|
||||
} // namespace
|
||||
|
||||
Status::Status(tensorflow::error::Code code, StringPiece msg) {
|
||||
Status::Status(tensorflow::error::Code code, tensorflow::StringPiece msg,
|
||||
std::vector<StackFrame>&& stack_trace) {
|
||||
assert(code != tensorflow::error::OK);
|
||||
state_ = std::unique_ptr<State>(new State);
|
||||
state_->code = code;
|
||||
state_->msg = string(msg);
|
||||
state_->stack_trace = std::move(stack_trace);
|
||||
VLOG(5) << "Generated non-OK status: \"" << *this << "\". "
|
||||
<< CurrentStackTrace();
|
||||
}
|
||||
@ -117,6 +119,11 @@ const string& Status::empty_string() {
|
||||
return *empty;
|
||||
}
|
||||
|
||||
const std::vector<StackFrame>& Status::empty_stack_trace() {
|
||||
static std::vector<StackFrame>* empty = new std::vector<StackFrame>();
|
||||
return *empty;
|
||||
}
|
||||
|
||||
string error_name(error::Code code) {
|
||||
switch (code) {
|
||||
case tensorflow::error::OK:
|
||||
|
@ -29,6 +29,13 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// A struct representing a frame in a stack trace.
|
||||
struct StackFrame {
|
||||
std::string file_name;
|
||||
int line_number;
|
||||
std::string function_name;
|
||||
};
|
||||
|
||||
#if defined(__clang__)
|
||||
// Only clang supports warn_unused_result as a type annotation.
|
||||
class TF_MUST_USE_RESULT Status;
|
||||
@ -43,7 +50,15 @@ class Status {
|
||||
|
||||
/// \brief Create a status with the specified error code and msg as a
|
||||
/// human-readable string containing more detailed information.
|
||||
Status(tensorflow::error::Code code, tensorflow::StringPiece msg);
|
||||
Status(tensorflow::error::Code code, tensorflow::StringPiece msg)
|
||||
: Status(code, msg, {}) {}
|
||||
|
||||
/// \brief Create a status with the specified error code, msg, and stack trace
|
||||
/// as a human-readable string containing more detailed information.
|
||||
#ifndef SWIG
|
||||
Status(tensorflow::error::Code code, tensorflow::StringPiece msg,
|
||||
std::vector<StackFrame>&& stack_trace);
|
||||
#endif
|
||||
|
||||
/// Copy the specified status.
|
||||
Status(const Status& s);
|
||||
@ -66,6 +81,10 @@ class Status {
|
||||
return ok() ? empty_string() : state_->msg;
|
||||
}
|
||||
|
||||
const std::vector<StackFrame>& stack_trace() const {
|
||||
return ok() ? empty_stack_trace() : state_->stack_trace;
|
||||
}
|
||||
|
||||
bool operator==(const Status& x) const;
|
||||
bool operator!=(const Status& x) const;
|
||||
|
||||
@ -91,9 +110,11 @@ class Status {
|
||||
|
||||
private:
|
||||
static const std::string& empty_string();
|
||||
static const std::vector<StackFrame>& empty_stack_trace();
|
||||
struct State {
|
||||
tensorflow::error::Code code;
|
||||
std::string msg;
|
||||
std::vector<StackFrame> stack_trace;
|
||||
};
|
||||
// OK status has a `NULL` state_. Otherwise, `state_` points to
|
||||
// a `State` structure containing the error code and message(s)
|
||||
|
@ -61,6 +61,7 @@ filegroup(
|
||||
filegroup(
|
||||
name = "mobile_srcs_only_runtime",
|
||||
srcs = [
|
||||
"abstract_stack_trace.h",
|
||||
"batch_util.cc",
|
||||
"batch_util.h",
|
||||
"bcast.cc",
|
||||
@ -313,6 +314,7 @@ filegroup(
|
||||
filegroup(
|
||||
name = "framework_srcs",
|
||||
srcs = [
|
||||
"abstract_stack_trace.h",
|
||||
"activation_mode.h",
|
||||
"batch_util.h",
|
||||
"bcast.h",
|
||||
@ -437,6 +439,22 @@ cc_library(
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "abstract_stack_trace",
|
||||
hdrs = ["abstract_stack_trace.h"],
|
||||
visibility = [
|
||||
"//tensorflow/c/eager:__pkg__",
|
||||
"//tensorflow/core:__pkg__",
|
||||
"//tensorflow/core/common_runtime/eager:__pkg__",
|
||||
"//tensorflow/core/platform:__pkg__",
|
||||
"//tensorflow/python:__pkg__",
|
||||
"//tensorflow/python/eager:__pkg__",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core/platform:status",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cuda_library(
|
||||
name = "gpu_cuda_alias",
|
||||
hdrs = ["gpu_cuda_alias.h"],
|
||||
|
44
tensorflow/core/util/abstract_stack_trace.h
Normal file
44
tensorflow/core/util/abstract_stack_trace.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_UTIL_ABSTRACT_STACK_TRACE_H_
|
||||
#define TENSORFLOW_CORE_UTIL_ABSTRACT_STACK_TRACE_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// Language agnostic stack trace class. It only saves an id, and language
|
||||
// clients are responsible for managing the actual stack trace objects.
|
||||
class AbstractStackTrace {
|
||||
public:
|
||||
AbstractStackTrace(int id, std::vector<StackFrame> (*to_stack_frames)(int))
|
||||
: id_(id), to_stack_frames_(to_stack_frames) {}
|
||||
|
||||
// Returns stack trace as a vector of `StackFrame`s.
|
||||
std::vector<StackFrame> ToStackFrames() const {
|
||||
return to_stack_frames_(id_);
|
||||
}
|
||||
|
||||
private:
|
||||
int id_;
|
||||
std::vector<StackFrame> (*to_stack_frames_)(int);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_UTIL_ABSTRACT_STACK_TRACE_H_
|
@ -5664,9 +5664,11 @@ cc_library(
|
||||
hdrs = ["util/stack_trace.h"],
|
||||
deps = [
|
||||
":py_util",
|
||||
"//tensorflow/core/platform:str_util",
|
||||
"//tensorflow/core/platform:stringpiece",
|
||||
"//tensorflow/core/util:abstract_stack_trace",
|
||||
"//third_party/python_runtime:headers", # buildcleaner: keep
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
],
|
||||
)
|
||||
|
@ -56,13 +56,16 @@ cc_library(
|
||||
"//tensorflow/core/platform:logging",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/util:abstract_stack_trace",
|
||||
"//tensorflow/python:cpp_python_util",
|
||||
"//tensorflow/python:ndarray_tensor",
|
||||
"//tensorflow/python:ndarray_tensor_bridge",
|
||||
"//tensorflow/python:numpy_lib",
|
||||
"//tensorflow/python:py_exception_registry",
|
||||
"//tensorflow/python:py_seq_tensor",
|
||||
"//tensorflow/python:py_util",
|
||||
"//tensorflow/python:safe_ptr",
|
||||
"//tensorflow/python:stack_trace",
|
||||
"//third_party/py/numpy:headers",
|
||||
"//third_party/python_runtime:headers",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
|
@ -30,6 +30,7 @@ from tensorflow.python.eager import test
|
||||
from tensorflow.python.framework import config
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import errors_impl
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
from tensorflow.python.framework import test_util
|
||||
@ -480,6 +481,24 @@ class OpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
|
||||
self.assertIs(weak_x(), None)
|
||||
self.assertIs(weak_y(), None)
|
||||
|
||||
def testAsyncExceptionStackTrace(self):
|
||||
config.set_synchronous_execution(False)
|
||||
|
||||
def exception_originated_from_here():
|
||||
# Invalid shapes for matmul.
|
||||
return math_ops.matmul([[1]], [[2], [3]])
|
||||
|
||||
# In sync mode, an exception would have been raised here but since this is
|
||||
# in async, the exception will be raised next.
|
||||
x = exception_originated_from_here()
|
||||
|
||||
with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
|
||||
'in exception_originated_from_here'):
|
||||
x.numpy()
|
||||
|
||||
context.async_clear_error()
|
||||
config.set_synchronous_execution(True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test.main()
|
||||
|
@ -41,10 +41,13 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/util/abstract_stack_trace.h"
|
||||
#include "tensorflow/python/eager/pywrap_gradient_exclusions.h"
|
||||
#include "tensorflow/python/eager/pywrap_tensor.h"
|
||||
#include "tensorflow/python/eager/pywrap_tfe.h"
|
||||
#include "tensorflow/python/lib/core/py_util.h"
|
||||
#include "tensorflow/python/lib/core/safe_ptr.h"
|
||||
#include "tensorflow/python/util/stack_trace.h"
|
||||
#include "tensorflow/python/util/util.h"
|
||||
|
||||
using tensorflow::string;
|
||||
@ -854,10 +857,14 @@ void TFE_Py_ExecuteCancelable(TFE_Context* ctx, const char* device_name,
|
||||
TF_Status* out_status) {
|
||||
tensorflow::profiler::TraceMe activity(
|
||||
"TFE_Py_ExecuteCancelable", tensorflow::profiler::TraceMeLevel::kInfo);
|
||||
|
||||
TFE_Op* op = GetOp(ctx, op_name, device_name, out_status);
|
||||
|
||||
auto cleaner = tensorflow::gtl::MakeCleanup([ctx, op] { ReturnOp(ctx, op); });
|
||||
if (!out_status->status.ok()) return;
|
||||
|
||||
tensorflow::unwrap(op)->SetStackTrace(tensorflow::GetStackTrace());
|
||||
|
||||
for (int i = 0; i < inputs->size() && out_status->status.ok(); ++i) {
|
||||
TFE_OpAddInput(op, inputs->at(i), out_status);
|
||||
}
|
||||
@ -970,14 +977,54 @@ void RaiseFallbackException(const char* message) {
|
||||
.data());
|
||||
}
|
||||
|
||||
// Format and return `status`' error message with the attached stack trace if
|
||||
// available. `status` must have an error.
|
||||
std::string FormatErrorStatusStackTrace(const tensorflow::Status& status) {
|
||||
tensorflow::DCheckPyGilState();
|
||||
DCHECK(!status.ok());
|
||||
|
||||
if (status.stack_trace().empty()) return status.error_message();
|
||||
|
||||
const std::vector<tensorflow::StackFrame>& stack_trace = status.stack_trace();
|
||||
|
||||
PyObject* linecache = PyImport_ImportModule("linecache");
|
||||
PyObject* getline =
|
||||
PyObject_GetAttr(linecache, PyUnicode_FromString("getline"));
|
||||
DCHECK(getline);
|
||||
|
||||
std::ostringstream result;
|
||||
result << "Exception originated from\n\n";
|
||||
|
||||
for (const tensorflow::StackFrame& stack_frame : stack_trace) {
|
||||
PyObject* line_str_obj = PyObject_CallFunction(
|
||||
getline, const_cast<char*>("si"), stack_frame.file_name.c_str(),
|
||||
stack_frame.line_number);
|
||||
tensorflow::StringPiece line_str = TFE_GetPythonString(line_str_obj);
|
||||
tensorflow::str_util::RemoveWhitespaceContext(&line_str);
|
||||
result << " File \"" << stack_frame.file_name << "\", line "
|
||||
<< stack_frame.line_number << ", in " << stack_frame.function_name
|
||||
<< '\n';
|
||||
|
||||
if (!line_str.empty()) result << " " << line_str << '\n';
|
||||
Py_XDECREF(line_str_obj);
|
||||
}
|
||||
|
||||
Py_DecRef(getline);
|
||||
Py_DecRef(linecache);
|
||||
|
||||
result << '\n' << status.error_message();
|
||||
return result.str();
|
||||
}
|
||||
|
||||
int MaybeRaiseExceptionFromTFStatus(TF_Status* status, PyObject* exception) {
|
||||
if (status->status.ok()) return 0;
|
||||
const char* msg = TF_Message(status);
|
||||
if (exception == nullptr) {
|
||||
tensorflow::mutex_lock l(exception_class_mutex);
|
||||
if (exception_class != nullptr) {
|
||||
tensorflow::Safe_PyObjectPtr val(
|
||||
Py_BuildValue("si", msg, TF_GetCode(status)));
|
||||
tensorflow::Safe_PyObjectPtr val(Py_BuildValue(
|
||||
"si", FormatErrorStatusStackTrace(status->status).c_str(),
|
||||
TF_GetCode(status)));
|
||||
if (PyErr_Occurred()) {
|
||||
// NOTE: This hides the actual error (i.e. the reason `status` was not
|
||||
// TF_OK), but there is nothing we can do at this point since we can't
|
||||
@ -1003,7 +1050,8 @@ int MaybeRaiseExceptionFromStatus(const tensorflow::Status& status,
|
||||
if (exception == nullptr) {
|
||||
tensorflow::mutex_lock l(exception_class_mutex);
|
||||
if (exception_class != nullptr) {
|
||||
tensorflow::Safe_PyObjectPtr val(Py_BuildValue("si", msg, status.code()));
|
||||
tensorflow::Safe_PyObjectPtr val(Py_BuildValue(
|
||||
"si", FormatErrorStatusStackTrace(status).c_str(), status.code()));
|
||||
PyErr_SetObject(exception_class, val.get());
|
||||
return -1;
|
||||
} else {
|
||||
@ -3527,6 +3575,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject* args) {
|
||||
}
|
||||
|
||||
TFE_Op* op = GetOp(ctx, op_name, op_exec_info.device_name, status);
|
||||
tensorflow::unwrap(op)->SetStackTrace(tensorflow::GetStackTrace());
|
||||
|
||||
auto cleaner = tensorflow::gtl::MakeCleanup([status, ctx, op] {
|
||||
ReturnStatus(status);
|
||||
ReturnOp(ctx, op);
|
||||
@ -3746,11 +3796,14 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject* args) {
|
||||
if (!status->status.ok()) {
|
||||
// Augment the status with the op_name for easier debugging similar to
|
||||
// TFE_Py_Execute.
|
||||
TF_SetStatus(status, TF_GetCode(status),
|
||||
tensorflow::strings::StrCat(
|
||||
TF_Message(status),
|
||||
" [Op:", TFE_GetPythonString(op_exec_info.op_name), "]")
|
||||
.c_str());
|
||||
std::vector<tensorflow::StackFrame> stack_trace =
|
||||
status->status.stack_trace();
|
||||
status->status = tensorflow::Status(
|
||||
status->status.code(),
|
||||
tensorflow::strings::StrCat(
|
||||
TF_Message(status),
|
||||
" [Op:", TFE_GetPythonString(op_exec_info.op_name), "]"),
|
||||
std::move(stack_trace));
|
||||
|
||||
MaybeRaiseExceptionFromTFStatus(status, nullptr);
|
||||
return nullptr;
|
||||
|
@ -15,6 +15,9 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/python/util/stack_trace.h"
|
||||
|
||||
#include "tensorflow/core/platform/str_util.h"
|
||||
#include "tensorflow/core/platform/stringpiece.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// Returns C string from a Python string object. Handles Python2/3 strings.
|
||||
@ -31,22 +34,33 @@ const char* GetPythonString(PyObject* o) {
|
||||
return PyBytes_AsString(o);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace tensorflow {
|
||||
std::string StackTrace::ToString() const {
|
||||
DCheckPyGilState();
|
||||
|
||||
std::ostringstream result;
|
||||
std::vector<StackFrame> StackTrace::ToStackFrames() const {
|
||||
std::vector<StackFrame> result;
|
||||
result.reserve(size_);
|
||||
|
||||
for (int i = size_ - 1; i >= 0; --i) {
|
||||
result << " File \"" << PyUnicode_AsUTF8(code_objs_[i]->co_filename)
|
||||
<< "\", line "
|
||||
<< PyCode_Addr2Line(code_objs_[i], last_instructions_[i]) << ", in "
|
||||
<< GetPythonString(code_objs_[i]->co_name)
|
||||
<< "\n <source line unimplemented>\n";
|
||||
// TODO(kkb): Add source code line. See tf_stack.cc's
|
||||
// FrameSummary::line() function.
|
||||
const char* file_name = GetPythonString(code_objs_[i]->co_filename);
|
||||
const int line_number =
|
||||
PyCode_Addr2Line(code_objs_[i], last_instructions_[i]);
|
||||
result.emplace_back(StackFrame{file_name, line_number,
|
||||
GetPythonString(code_objs_[i]->co_name)});
|
||||
}
|
||||
return result.str();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
StackTrace* StackTraceManager::Get(int id) {
|
||||
DCheckPyGilState();
|
||||
if (next_id_ - id > kStackTraceCircularBufferSize) return nullptr;
|
||||
|
||||
return &stack_traces_[id & (kStackTraceCircularBufferSize - 1)];
|
||||
}
|
||||
|
||||
StackTraceManager* const stack_trace_manager = new StackTraceManager();
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -25,6 +25,8 @@ limitations under the License.
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
#include "absl/base/optimization.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "tensorflow/core/util/abstract_stack_trace.h"
|
||||
#include "tensorflow/python/lib/core/py_util.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -82,10 +84,8 @@ class StackTrace final {
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Returns string representation of the captured stack trace.
|
||||
std::string ToString() const;
|
||||
|
||||
// TODO(kkb): Implement structured stack trace object getter.
|
||||
// Returns a structured representation of the captured stack trace.
|
||||
std::vector<StackFrame> ToStackFrames() const;
|
||||
|
||||
private:
|
||||
std::array<PyCodeObject*, kMaxDepth> code_objs_;
|
||||
@ -103,6 +103,53 @@ class StackTrace final {
|
||||
StackTrace& operator=(const StackTrace&) = delete;
|
||||
};
|
||||
|
||||
// A class that manages Python stack traces in a circular buffer. Users can
|
||||
// insert stack trace entries and retrive them by ids.
|
||||
class StackTraceManager {
|
||||
public:
|
||||
static constexpr int kStackTraceCircularBufferSize = 1024;
|
||||
|
||||
// Captures the current Python stack trace and returns an id.
|
||||
// Python GIL must be acquired beforehand.
|
||||
ABSL_MUST_USE_RESULT
|
||||
ABSL_ATTRIBUTE_HOT
|
||||
int Capture() {
|
||||
DCheckPyGilState();
|
||||
const int id = next_id_++;
|
||||
const int index = id & (kStackTraceCircularBufferSize - 1);
|
||||
stack_traces_[index] = StackTrace::Capture();
|
||||
return id;
|
||||
}
|
||||
|
||||
// Retrieve captured Python stack trace by id. Returns `nullptr` if the
|
||||
// requested stack trace is evicted from the circular buffer.
|
||||
// Python GIL must be acquired beforehand.
|
||||
ABSL_MUST_USE_RESULT
|
||||
StackTrace* Get(int id);
|
||||
|
||||
private:
|
||||
int next_id_ = 0;
|
||||
std::array<StackTrace, kStackTraceCircularBufferSize> stack_traces_;
|
||||
};
|
||||
|
||||
// Singleton StackTraceManager.
|
||||
extern StackTraceManager* const stack_trace_manager;
|
||||
|
||||
// Returns Python stack trace object that can be converted to string.
|
||||
// Note that the actual stack trace is kept in a circular buffer for string
|
||||
// conversion could fail if it's evicted before.
|
||||
// Python GIL must be acquired beforehand.
|
||||
inline AbstractStackTrace GetStackTrace() {
|
||||
DCheckPyGilState();
|
||||
return AbstractStackTrace(stack_trace_manager->Capture(), [](int id) {
|
||||
PyGILState_STATE gstate = PyGILState_Ensure();
|
||||
std::vector<StackFrame> result =
|
||||
stack_trace_manager->Get(id)->ToStackFrames();
|
||||
PyGILState_Release(gstate);
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_PYTHON_UTIL_STACK_TRACE_H_
|
||||
|
Loading…
Reference in New Issue
Block a user