minor spelling tweaks
This commit is contained in:
parent
70cdf91366
commit
775a828aad
@ -33,13 +33,13 @@ END
|
||||
in_arg {
|
||||
name: "l1"
|
||||
description: <<END
|
||||
L1 regulariation. Must be a scalar.
|
||||
L1 regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "l2"
|
||||
description: <<END
|
||||
L2 regulariation. Must be a scalar.
|
||||
L2 regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
|
@ -33,13 +33,13 @@ END
|
||||
in_arg {
|
||||
name: "l1"
|
||||
description: <<END
|
||||
L1 regulariation. Must be a scalar.
|
||||
L1 regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "l2"
|
||||
description: <<END
|
||||
L2 shrinkage regulariation. Must be a scalar.
|
||||
L2 shrinkage regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
|
@ -35,7 +35,7 @@ END
|
||||
in_arg {
|
||||
name: "min_node_weight"
|
||||
description: <<END
|
||||
mininum avg of hessians in a node before required for the node to be considered for splitting.
|
||||
minimum avg of hessians in a node before required for the node to be considered for splitting.
|
||||
END
|
||||
}
|
||||
out_arg {
|
||||
|
@ -34,7 +34,7 @@ END
|
||||
in_arg {
|
||||
name: "min_node_weight"
|
||||
description: <<END
|
||||
mininum avg of hessians in a node before required for the node to be considered for splitting.
|
||||
minimum avg of hessians in a node before required for the node to be considered for splitting.
|
||||
END
|
||||
}
|
||||
out_arg {
|
||||
@ -90,4 +90,4 @@ In this manner, the output is the best split per features and per node, so that
|
||||
The length of output lists are all of the same length, `num_features`.
|
||||
The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature.
|
||||
END
|
||||
}
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ END
|
||||
in_arg {
|
||||
name: "min_node_weight"
|
||||
description: <<END
|
||||
mininum avg of hessians in a node before required for the node to be considered for splitting.
|
||||
minimum avg of hessians in a node before required for the node to be considered for splitting.
|
||||
END
|
||||
}
|
||||
out_arg {
|
||||
|
@ -18,7 +18,7 @@ weights: the canonical form of weights that can be used for saving
|
||||
biases: the canonical form of biases that can be used for saving
|
||||
and restoration. They are more likely to be compatible across different
|
||||
generations.
|
||||
num_params_weigths: number of weight parameter matrix for all layers.
|
||||
num_params_weights: number of weight parameter matrix for all layers.
|
||||
num_params_biases: number of bias parameter vector for all layers.
|
||||
rnn_mode: Indicates the type of the RNN model.
|
||||
input_mode: Indicate whether there is a linear projection between the input and
|
||||
|
@ -12,7 +12,7 @@ biases.
|
||||
num_layers: Specifies the number of layers in the RNN model.
|
||||
num_units: Specifies the size of the hidden state.
|
||||
input_size: Specifies the size of the input state.
|
||||
num_params_weigths: number of weight parameter matrix for all layers.
|
||||
num_params_weights: number of weight parameter matrix for all layers.
|
||||
num_params_biases: number of bias parameter vector for all layers.
|
||||
weights: the canonical form of weights that can be used for saving
|
||||
and restoration. They are more likely to be compatible across different
|
||||
|
@ -20,7 +20,7 @@ Supported values:
|
||||
element is a bit which is set to 1 if the input tensor has an
|
||||
infinity or nan value, or zero otherwise.
|
||||
|
||||
3 (CONCISE_HEALTH): Ouput a float32/64 tensor of shape [5]. The 1st
|
||||
3 (CONCISE_HEALTH): Output a float32/64 tensor of shape [5]. The 1st
|
||||
element is the tensor_id, if provided, and -1 otherwise. The
|
||||
remaining four slots are the total number of elements, -infs,
|
||||
+infs, and nans in the input tensor respectively.
|
||||
|
@ -4,7 +4,7 @@ op {
|
||||
in_arg {
|
||||
name: "tree_ensemble_handle"
|
||||
description: <<END
|
||||
Handle to the tree ensemble resouce.
|
||||
Handle to the tree ensemble resource.
|
||||
END
|
||||
}
|
||||
out_arg {
|
||||
|
@ -33,13 +33,13 @@ END
|
||||
in_arg {
|
||||
name: "l1"
|
||||
description: <<END
|
||||
L1 regulariation. Must be a scalar.
|
||||
L1 regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "l2"
|
||||
description: <<END
|
||||
L2 regulariation. Must be a scalar.
|
||||
L2 regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
|
@ -33,13 +33,13 @@ END
|
||||
in_arg {
|
||||
name: "l1"
|
||||
description: <<END
|
||||
L1 regulariation. Must be a scalar.
|
||||
L1 regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "l2"
|
||||
description: <<END
|
||||
L2 shrinkage regulariation. Must be a scalar.
|
||||
L2 shrinkage regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
|
@ -45,7 +45,7 @@ END
|
||||
in_arg {
|
||||
name: "l2"
|
||||
description: <<END
|
||||
L2 shrinkage regulariation. Must be a scalar.
|
||||
L2 shrinkage regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
|
@ -45,7 +45,7 @@ END
|
||||
in_arg {
|
||||
name: "l2"
|
||||
description: <<END
|
||||
L2 shrinkage regulariation. Must be a scalar.
|
||||
L2 shrinkage regularization. Must be a scalar.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
|
@ -5,7 +5,7 @@ op {
|
||||
name: "superdiag"
|
||||
description: <<END
|
||||
Tensor of shape `[..., 1, M]`, representing superdiagonals of
|
||||
tri-diagonal matrices to the left of multiplication. Last element is ingored.
|
||||
tri-diagonal matrices to the left of multiplication. Last element is ignored.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
@ -19,7 +19,7 @@ END
|
||||
name: "subdiag"
|
||||
description: <<END
|
||||
Tensor of shape `[..., 1, M]`, representing subdiagonals of tri-diagonal
|
||||
matrices to the left of multiplication. First element is ingored.
|
||||
matrices to the left of multiplication. First element is ignored.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
|
@ -1004,7 +1004,7 @@ void ColocationGraph::GetSoftDeviceCandidates(
|
||||
|
||||
// Failed to find supported devices that don't violate resource devices.
|
||||
// Try finding some devices that violated resource devices.
|
||||
// If we succceed, we will log a warning below.
|
||||
// If we succeed, we will log a warning below.
|
||||
soft_device_name = root_member.GetSoftDeviceName();
|
||||
device_set_.FindMatchingDevices(soft_device_name, possible_devices);
|
||||
if (!possible_devices->empty()) {
|
||||
|
@ -704,7 +704,7 @@ class TestTFFileSystem : public ::tensorflow::NullFileSystem {
|
||||
::tensorflow::Tensor data_tensor_;
|
||||
};
|
||||
|
||||
// A test TF environent that checks that the environment was used.
|
||||
// A test TF environment that checks that the environment was used.
|
||||
class TestTFEnvironment : public ::tensorflow::EnvWrapper {
|
||||
public:
|
||||
using tf_base = ::tensorflow::EnvWrapper;
|
||||
|
@ -125,7 +125,7 @@ Status DynamicDeviceMgr::AddDevices(
|
||||
if (device_map_.find(d->name()) != device_map_.end()) {
|
||||
return errors::InvalidArgument(
|
||||
"Trying to add device ", d->name(),
|
||||
" to manager but its name conflicts with an existing deivce.");
|
||||
" to manager but its name conflicts with an existing device.");
|
||||
}
|
||||
// Register under the (1) full name and (2) canonical name.
|
||||
for (const string& name :
|
||||
|
@ -101,7 +101,7 @@ TEST(DynamicDeviceMgrTest, AddRepeatedDeviceToMgr) {
|
||||
added_devices.emplace_back(std::move(d1));
|
||||
Status s = dm->AddDevices(std::move(added_devices));
|
||||
EXPECT_TRUE(absl::StrContains(s.error_message(),
|
||||
"name conflicts with an existing deivce"));
|
||||
"name conflicts with an existing device"));
|
||||
}
|
||||
|
||||
TEST(DynamicDeviceMgrTest, RemoveNonExistingDeviceFromMgr) {
|
||||
|
@ -609,7 +609,7 @@ class EagerContext : public core::RefCounted {
|
||||
|
||||
uint64 context_id_ GUARDED_BY(remote_state_mu_);
|
||||
// The view id of an eager context should be set to 0 when context is created,
|
||||
// and continously incremented when context with the same context_id gets
|
||||
// and continuously incremented when context with the same context_id gets
|
||||
// updated. The view id should be consistent between master and workers.
|
||||
uint64 context_view_id_ GUARDED_BY(remote_state_mu_);
|
||||
std::vector<string> remote_contexts_;
|
||||
|
@ -190,7 +190,7 @@ class EagerExecutor {
|
||||
void NotifyWaiters(uint64 id) EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_);
|
||||
|
||||
// Starts execution of pending EagerNodes. This function loops till executor
|
||||
// state_ is set to kShutDown. If any errors are encontered, these are set
|
||||
// state_ is set to kShutDown. If any errors are encountered, these are set
|
||||
// inside `status_`. The loop blocks anytime there are no pending nodes, or if
|
||||
// `status_` is not ok.
|
||||
void Run();
|
||||
|
@ -34,7 +34,7 @@ class EagerOpRewrite {
|
||||
|
||||
virtual ~EagerOpRewrite() {}
|
||||
|
||||
// To be implemnted by an Eager op rewrite pass.
|
||||
// To be implemented by an Eager op rewrite pass.
|
||||
virtual Status Run(EagerOperation* orig_op,
|
||||
std::unique_ptr<tensorflow::EagerOperation>* out_op) = 0;
|
||||
|
||||
|
@ -326,7 +326,7 @@ Status KernelAndDeviceFunc::Run(
|
||||
} else {
|
||||
opts = absl::make_unique<FunctionLibraryRuntime::Options>();
|
||||
if (get_op_id_ && is_cross_process_) {
|
||||
// If the function is a cross-process function and the remote excution
|
||||
// If the function is a cross-process function and the remote execution
|
||||
// goes through eager service, create an eager op id for the function.
|
||||
opts->op_id = get_op_id_();
|
||||
}
|
||||
|
@ -255,7 +255,7 @@ void BuildConcurrentAddAssign(Graph* g) {
|
||||
auto one = test::graph::Constant(g, V(1.0));
|
||||
// A variable holds one float.
|
||||
auto var = test::graph::Var(g, DT_FLOAT, TensorShape({}));
|
||||
// Initilize the variable with 1.0.
|
||||
// Initialize the variable with 1.0.
|
||||
auto init = test::graph::Assign(g, var, one);
|
||||
// Output
|
||||
auto out = test::graph::Send(g, var, "out", ALICE, kIncarnation, BOB);
|
||||
|
@ -1544,7 +1544,7 @@ std::vector<string> InputDevices(const Node& caller) {
|
||||
return input_devices;
|
||||
}
|
||||
|
||||
// Place input nodes on the same device as the correspinding caller input
|
||||
// Place input nodes on the same device as the corresponding caller input
|
||||
// node. Do not specify any placement for all other nodes.
|
||||
class DefaultFunctionBodyPlacer : public InlinedFunctionBodyPlacer {
|
||||
public:
|
||||
@ -1593,7 +1593,7 @@ class SingleDeviceFunctionBodyPlacer : public InlinedFunctionBodyPlacer {
|
||||
const string caller_device_;
|
||||
};
|
||||
|
||||
// Place input nodes on the same device as the correspinding caller input
|
||||
// Place input nodes on the same device as the corresponding caller input
|
||||
// node. Do not place output node. Place control nodes on the same device as
|
||||
// caller node. For all function body nodes overrides job, replica and task
|
||||
// parts of the device assignment to match function caller node.
|
||||
|
@ -1312,7 +1312,7 @@ int GetConstantFoldingCounter() {
|
||||
return counter;
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << "Should have found a node that replcaed add";
|
||||
LOG(FATAL) << "Should have found a node that replaced add";
|
||||
}
|
||||
|
||||
TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) {
|
||||
@ -2129,7 +2129,7 @@ TEST(OptimizationTest, RemoveListArrayConverter) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(OptimizationTest, RemoveListArrayConverter_WithContolDeps) {
|
||||
TEST(OptimizationTest, RemoveListArrayConverter_WithControlDeps) {
|
||||
auto func = FDH::Create(
|
||||
// Name
|
||||
"Test",
|
||||
|
@ -1837,7 +1837,7 @@ void GPUKernelTracker::RecordTerminated(uint64 queued_count) {
|
||||
// advance the completed frontier to the just-completed PendingKernel. In
|
||||
// practice we occasionally see the termination callbacks arrive out of
|
||||
// order probably because of thread scheduling. Eventually we may support
|
||||
// out-of- order completion involving multple compute streams so here we
|
||||
// out-of- order completion involving multiple compute streams so here we
|
||||
// follow a conservative approach and wait for every single callback to
|
||||
// arrive before advancing the frontier.
|
||||
while (true) {
|
||||
|
@ -157,7 +157,7 @@ class BaseGPUDevice : public LocalDevice {
|
||||
int32 pending_cap_ = 0;
|
||||
bool timestamped_allocator_ = false;
|
||||
|
||||
// Initialize scractch buffers used by Eigen.
|
||||
// Initialize scratch buffers used by Eigen.
|
||||
Status InitScratchBuffers();
|
||||
|
||||
void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device,
|
||||
|
@ -56,7 +56,7 @@ namespace tensorflow {
|
||||
//
|
||||
// Assuming we configure the Session to create one BaseGPUDevice per GPU
|
||||
// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
|
||||
// the following mappting between TF GPU id and platform GPU id:
|
||||
// the following mapping between TF GPU id and platform GPU id:
|
||||
//
|
||||
// TF GPU id -> platform GPU ID
|
||||
// 0 (i.e. /device:GPU:0) -> 2
|
||||
@ -67,7 +67,7 @@ namespace tensorflow {
|
||||
//
|
||||
// On the other hand, if we configure it to create 2 BaseGPUDevice per GPU
|
||||
// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
|
||||
// the following mappting between TF GPU id and platform GPU id:
|
||||
// the following mapping between TF GPU id and platform GPU id:
|
||||
//
|
||||
// TF GPU id -> platform GPU ID
|
||||
// 0 (i.e. /device:GPU:0) -> 2
|
||||
|
@ -615,7 +615,7 @@ Status GraphExecutionState::InitBaseGraph(std::unique_ptr<Graph>&& new_graph) {
|
||||
session_options_->config.allow_soft_placement(),
|
||||
session_options_ != nullptr &&
|
||||
session_options_->config.log_device_placement());
|
||||
// TODO(mrry): Consider making the Placer cancelable.
|
||||
// TODO(mrry): Consider making the Placer cancellable.
|
||||
TF_RETURN_IF_ERROR(placer.Run());
|
||||
|
||||
TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
|
||||
|
@ -53,7 +53,7 @@ void RecordTFDataFingerprint(const string& name);
|
||||
// Records the number of independent graph changes resulting from the
|
||||
// application of a tf.data optimization.
|
||||
//
|
||||
// The `name` argument identifies the optimization (e.g. "noop_eliminiation").
|
||||
// The `name` argument identifies the optimization (e.g. "noop_elimination").
|
||||
void RecordTFDataOptimization(const string& name, int64 num_changes);
|
||||
|
||||
// Records parsing of dense tensor features.
|
||||
|
@ -73,7 +73,7 @@ class GraphOptimizationPass {
|
||||
string name() const { return name_; }
|
||||
|
||||
private:
|
||||
// The name of the opitimization pass, which is the same as the inherited
|
||||
// The name of the optimization pass, which is the same as the inherited
|
||||
// class name.
|
||||
string name_;
|
||||
};
|
||||
|
@ -41,7 +41,7 @@ Status PartitionFunctionGraph(
|
||||
//
|
||||
// More specifically, this function
|
||||
// (1) rewrites the indices of the `Arg` and `Retval` nodes placed
|
||||
// on a particular device. When a function is parittioned each
|
||||
// on a particular device. When a function is partitioned each
|
||||
// partition, `subgraph`, get a subset of the arguments and
|
||||
// return values. The `index` attributes of these _Arg and _Retval
|
||||
// nodes reflect the indices of these parameters in the original
|
||||
|
@ -274,7 +274,7 @@ class PlacerTest : public ::testing::Test {
|
||||
RewriterConfig* rewriter_config = graph_opts->mutable_rewrite_options();
|
||||
rewriter_config->set_disable_meta_optimizer(true);
|
||||
|
||||
// Placing nested functions requires go through some PRE_PLACEMNT passes.
|
||||
// Placing nested functions requires go through some PRE_PLACEMENT passes.
|
||||
// Currently, just the IsolateDeepOpsPass.
|
||||
GraphOptimizationPassOptions optimization_options;
|
||||
std::unique_ptr<Graph> graph_ptr(graph);
|
||||
@ -1493,7 +1493,7 @@ TEST_F(PlacerTest, TestUnknownAssignedDevice) {
|
||||
|
||||
// Test that placement fails when an op with no registered kernels is
|
||||
// requested and no device is requested for the node
|
||||
TEST_F(PlacerTest, TestNoKernelsRegisteredWithNoRequstedDevice) {
|
||||
TEST_F(PlacerTest, TestNoKernelsRegisteredWithNoRequestedDevice) {
|
||||
Graph g(OpRegistry::Global());
|
||||
{ // Scope for temporary variables used to construct g.
|
||||
GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
|
||||
@ -2286,7 +2286,7 @@ TEST_F(NestedPlacerTest, OutputTwoResources_UnassignedResource) {
|
||||
* the "second pass" as they are "sources". It assigns `r1` to GPU because it
|
||||
* is in the same group as `b`. It assigns `r2` to GPU because GPU has a
|
||||
* higher device preference. Finally, `a` is assigned to GPU because `r2` is
|
||||
* on GPU - this test that the "second pass" heuristics respect colocaton
|
||||
* on GPU - this test that the "second pass" heuristics respect colocation
|
||||
* groups (even when the consumer of the source, i.e. PCO is on a different
|
||||
* device).
|
||||
*/
|
||||
@ -2494,7 +2494,7 @@ TEST_F(NestedPlacerTest, DuplicateInputResource_Conflict) {
|
||||
* r1:RESOURCE:GPU
|
||||
*
|
||||
* There is a conflict but Placer always overrides requested devices
|
||||
* when they result in coflict due to resource edges. Which device
|
||||
* when they result in conflict due to resource edges. Which device
|
||||
* is picked for a/r1/r2 is indeterministic.
|
||||
*/
|
||||
FunctionDef func = test::function::Swap();
|
||||
|
@ -444,7 +444,7 @@ class ProcessFunctionLibraryRuntime {
|
||||
std::unordered_map<string, FunctionLibraryRuntime::Handle> table_
|
||||
GUARDED_BY(mu_);
|
||||
|
||||
// Function data for instantitated remote functions.
|
||||
// Function data for instantiated remote functions.
|
||||
std::unordered_map<FunctionLibraryRuntime::Handle,
|
||||
std::unique_ptr<FunctionData>>
|
||||
function_data_ GUARDED_BY(mu_);
|
||||
|
@ -126,7 +126,7 @@ TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithMoreThanOneAttributes) {
|
||||
ASSERT_EQ("100", attributes["first_n"]);
|
||||
}
|
||||
|
||||
TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithMoreDuplicatettributes) {
|
||||
TEST_F(DebugGraphUtilsTest, TestValidDebugOpNameWithMoreDuplicateAttributes) {
|
||||
string debug_op_name_proper;
|
||||
std::unordered_map<string, string> attributes;
|
||||
Status s = ParseDebugOpName(
|
||||
|
@ -162,7 +162,7 @@ void EagerClusterFunctionLibraryRuntime::Run(
|
||||
remote_op->add_inputs()->Swap(&(*args)[i]);
|
||||
}
|
||||
// The remote component function should use the same op_id as its parent
|
||||
// multi-device function's in order to get the global unqiue op_id generated
|
||||
// multi-device function's in order to get the global unique op_id generated
|
||||
// by the master context.
|
||||
remote_op->set_id(opts.op_id.value());
|
||||
remote_op->set_is_function(true);
|
||||
|
@ -588,7 +588,7 @@ class RunManyGraphs {
|
||||
void ReportBadStatus(const Status& s) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
VLOG(1) << "Master received error status " << s;
|
||||
if (!cancel_issued_ && !StatusGroup::IsDerived(s)) {
|
||||
// Only start cancelling other workers upon receiveing a non-derived
|
||||
// Only start cancelling other workers upon receiving a non-derived
|
||||
// error
|
||||
cancel_issued_ = true;
|
||||
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// GrpcMasterService implements the RPC service MasterSerivce.
|
||||
// GrpcMasterService implements the RPC service MasterService.
|
||||
//
|
||||
// A GrpcMasterService maintains the state of live graph computation
|
||||
// sessions, each session orchestrates both local and remote devices
|
||||
|
@ -24,8 +24,8 @@ const char* ToString(UntypedStreamingRPCState::Tag::TagType tag_type) {
|
||||
return "kCallStarted";
|
||||
case UntypedStreamingRPCState::Tag::TagType::kRequestWriteCompleted:
|
||||
return "kRequestWriteCompleted";
|
||||
case UntypedStreamingRPCState::Tag::TagType::kResponseReadCommpleted:
|
||||
return "kResponseReadCommpleted";
|
||||
case UntypedStreamingRPCState::Tag::TagType::kResponseReadCompleted:
|
||||
return "kResponseReadCompleted";
|
||||
case UntypedStreamingRPCState::Tag::TagType::kCallFinished:
|
||||
return "kCallFinished";
|
||||
}
|
||||
@ -43,7 +43,7 @@ void UntypedStreamingRPCState::Tag::OnCompleted(bool ok) {
|
||||
case TagType::kRequestWriteCompleted:
|
||||
streaming_state_->RequestWriteCompleted(ok);
|
||||
break;
|
||||
case TagType::kResponseReadCommpleted:
|
||||
case TagType::kResponseReadCompleted:
|
||||
streaming_state_->ResponseReadCompleted(ok);
|
||||
break;
|
||||
case TagType::kCallFinished:
|
||||
|
@ -221,7 +221,7 @@ class UntypedStreamingRPCState : public core::RefCounted {
|
||||
enum class TagType {
|
||||
kCallStarted,
|
||||
kRequestWriteCompleted,
|
||||
kResponseReadCommpleted,
|
||||
kResponseReadCompleted,
|
||||
kCallFinished,
|
||||
};
|
||||
|
||||
@ -337,7 +337,7 @@ class ExchangeQueue {
|
||||
|
||||
// Changes the state of the exchange that is current in kRequestWriteIssued
|
||||
// state to kRequestWriteCompleted state.
|
||||
// REQUIRES: There is an exhange in kRequestWriteIssued state.
|
||||
// REQUIRES: There is an exchange in kRequestWriteIssued state.
|
||||
void MarkRequestWriteCompleted();
|
||||
|
||||
// Returns the exchange at the front of the queue.
|
||||
@ -536,7 +536,7 @@ class StreamingRPCState : public UntypedStreamingRPCState {
|
||||
void MarkDoneAndCompleteExchanges(Status status) EXCLUSIVE_LOCKS_REQUIRED(mu_)
|
||||
UNLOCK_FUNCTION(mu_) {
|
||||
call_state_ = State::kDone;
|
||||
VLOG(2) << "Ending gRPC stremaing call on the client side due to "
|
||||
VLOG(2) << "Ending gRPC streaming call on the client side due to "
|
||||
<< status.ToString();
|
||||
// Swap the exchanges_ into a temporary ExchangeQueue so that we can
|
||||
// complete all exchanges without holding mu_ in case user callback
|
||||
@ -604,7 +604,7 @@ class StreamingRPCState : public UntypedStreamingRPCState {
|
||||
// Tags are immutable. No need to guard them.
|
||||
Tag call_started_tag_{this, Tag::TagType::kCallStarted};
|
||||
Tag request_write_completed_tag_{this, Tag::TagType::kRequestWriteCompleted};
|
||||
Tag response_read_completed_tag_{this, Tag::TagType::kResponseReadCommpleted};
|
||||
Tag response_read_completed_tag_{this, Tag::TagType::kResponseReadCompleted};
|
||||
Tag finished_tag_{this, Tag::TagType::kCallFinished};
|
||||
};
|
||||
|
||||
|
@ -62,7 +62,7 @@ class TestWorkerInterface : public WorkerInterface {
|
||||
}
|
||||
|
||||
void RunGraphAsync(CallOptions* opts, RunGraphRequestWrapper* request,
|
||||
MutableRunGraphResponseWrapper* repsonse,
|
||||
MutableRunGraphResponseWrapper* response,
|
||||
StatusCallback done) override {
|
||||
done(errors::Unimplemented("RunGraphAsync"));
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ class WorkerInterface {
|
||||
StatusCallback done) = 0;
|
||||
|
||||
virtual void RunGraphAsync(CallOptions* opts, RunGraphRequestWrapper* request,
|
||||
MutableRunGraphResponseWrapper* repsonse,
|
||||
MutableRunGraphResponseWrapper* response,
|
||||
StatusCallback done) = 0;
|
||||
|
||||
virtual void RunGraphAsync(CallOptions* opts, const RunGraphRequest* request,
|
||||
|
@ -338,7 +338,7 @@ inline Status BroadcastBinaryOpShapeFn(InferenceContext* c) {
|
||||
// Shape function for random operations.
|
||||
Status RandomShape(shape_inference::InferenceContext* c);
|
||||
|
||||
// Shape function for Slice opertaions.
|
||||
// Shape function for Slice operations.
|
||||
Status SliceShape(shape_inference::InferenceContext* c);
|
||||
|
||||
// Validates the 3 component tensors of a sparse tensor have the proper
|
||||
|
@ -493,7 +493,7 @@ class SerializationContext {
|
||||
// `input_list`.
|
||||
bool fail_if_unimplemented = true;
|
||||
|
||||
// Indicates whether (potentionally large) data tensors should be
|
||||
// Indicates whether (potentially large) data tensors should be
|
||||
// serialized, or replaced with a placeholder returned in `input_list`. The
|
||||
// latter makes sense to do when performing data agnostic graph rewrites to
|
||||
// reduce the memory usage.
|
||||
@ -641,7 +641,7 @@ class IteratorBase {
|
||||
return errors::Unimplemented("RestoreInternal");
|
||||
}
|
||||
|
||||
// Returns the number of elements produced by this itertaor.
|
||||
// Returns the number of elements produced by this iterator.
|
||||
int64 num_elements() const {
|
||||
if (node_) return node_->num_elements();
|
||||
return 0;
|
||||
|
@ -62,7 +62,7 @@ class WhitelistedStatefulOpRegistry {
|
||||
// Note that the state of the whitelisted ops inside functions will not be
|
||||
// saved during checkpointing, hence this should only be used if the op is
|
||||
// marked stateful for reasons like to avoid constant folding during graph
|
||||
// optimiztion but is not stateful.
|
||||
// optimization but is not stateful.
|
||||
// If possible, try to remove the stateful flag on the op first.
|
||||
// Example usage:
|
||||
//
|
||||
|
@ -665,7 +665,7 @@ class FunctionLibraryRuntime {
|
||||
// "handle".
|
||||
//
|
||||
// If function execution succeeds, "done" is called with OK and
|
||||
// "*rets" is filled with the function's return values. Otheriwse,
|
||||
// "*rets" is filled with the function's return values. Otherwise,
|
||||
// "done" is called with an error status.
|
||||
//
|
||||
// Does not take ownership of "rets".
|
||||
|
@ -786,7 +786,7 @@ std::map<string, std::shared_ptr<Parameter>> Model::CollectTunableParameters(
|
||||
|
||||
std::map<string, std::shared_ptr<Parameter>> Model::CollectEssentialParallelism(
|
||||
std::shared_ptr<Node> node) {
|
||||
// Parallelism parameter is considered to be essential if the coressponding
|
||||
// Parallelism parameter is considered to be essential if the corresponding
|
||||
// transformations's processing time is greater than essential rate times the
|
||||
// average transformation self processing time.
|
||||
constexpr double kEssentialRate = 0.3L;
|
||||
|
@ -158,7 +158,7 @@ class AttrSlice {
|
||||
// account.
|
||||
//
|
||||
// TODO(irving): There is a bug in this routine inherited from its
|
||||
// OptimizerCSE::EqualAttrs precedecessor. The same tensor attr can be
|
||||
// OptimizerCSE::EqualAttrs predecessor. The same tensor attr can be
|
||||
// represented in more than one way as an AttrValue, since TensorProto is
|
||||
// not 1-1. This bug will go away once I replace everything with NodeInfo,
|
||||
// which stores a Tensor object directly. The Scratch object will also go
|
||||
|
@ -205,7 +205,7 @@ class ThreadWorkSource {
|
||||
// thread pool waiting queues. Wake up threads from sub thread waiting
|
||||
// queues.
|
||||
// The waiting queues are defined at RunHandlerPool.
|
||||
// Get the waiter_queue and coresponding mutex. Note, the thread work
|
||||
// Get the waiter_queue and corresponding mutex. Note, the thread work
|
||||
// source may change afterwards if a new request comes or an old request
|
||||
// finishes.
|
||||
tf_shared_lock lock(run_handler_waiter_mu_);
|
||||
@ -522,7 +522,7 @@ class RunHandlerThreadPool {
|
||||
|
||||
// Search tasks from Requets range searching_range_start to
|
||||
// searching_range_end. If there is no tasks in the search range and
|
||||
// may_steal_blocking_work is true, then search from all reuqests.
|
||||
// may_steal_blocking_work is true, then search from all requests.
|
||||
Task FindTask(
|
||||
int searching_range_start, int searching_range_end, int thread_id,
|
||||
int sub_thread_pool_id, int max_blocking_inflight,
|
||||
@ -596,7 +596,7 @@ Task RunHandlerThreadPool::FindTask(
|
||||
int current_index = thread_data_[thread_id].current_index;
|
||||
*task_from_blocking_queue = false;
|
||||
|
||||
// TODO(chaox): Chagne the search algorithm from round robin to random
|
||||
// TODO(chaox): Change the search algorithm from round robin to random
|
||||
// walk.
|
||||
for (int i = 0; i < searching_range_end - searching_range_start; ++i) {
|
||||
if (current_index >= searching_range_end) {
|
||||
|
@ -36,7 +36,7 @@ class SessionState {
|
||||
// Store a tensor in the session state.
|
||||
Status AddTensor(const string& handle, const Tensor& tensor);
|
||||
|
||||
// Delete a tensdor from the session state.
|
||||
// Delete a tensor from the session state.
|
||||
Status DeleteTensor(const string& handle);
|
||||
|
||||
int64 GetNewId();
|
||||
|
@ -36,7 +36,7 @@ namespace data {
|
||||
//
|
||||
// NOTE(mrry): `StatsAggregator` is a virtual interface because we anticipate
|
||||
// that many different implementations will have the same interface. For
|
||||
// example, we have diffferent implementations in "stats_aggregator_ops.cc" for
|
||||
// example, we have different implementations in "stats_aggregator_ops.cc" for
|
||||
// simple in-memory implementation that integrates with the pull-based summary
|
||||
// API, and for the push-based `SummaryWriterInterface`, and we may add
|
||||
// implementations that work well with other custom monitoring services.
|
||||
@ -50,7 +50,7 @@ class StatsAggregator {
|
||||
gtl::ArraySlice<double> values,
|
||||
int64 global_step) = 0;
|
||||
|
||||
// TODO(shivaniagarawal): consistency in double and float usage.
|
||||
// TODO(shivaniagrawal): consistency in double and float usage.
|
||||
// Add the given `value` as Scalar with the given `name`.
|
||||
virtual void AddScalar(const string& name, float value,
|
||||
int64 global_step) = 0;
|
||||
@ -67,7 +67,7 @@ class StatsAggregator {
|
||||
int64 val) = 0;
|
||||
};
|
||||
|
||||
// A `StatsAggregatorResource` wraps a shareable `StatsAggregator` as a resource
|
||||
// A `StatsAggregatorResource` wraps a sharable `StatsAggregator` as a resource
|
||||
// in the TensorFlow resource manager.
|
||||
//
|
||||
// NOTE(mrry): This class is separate from `StatsAggregator` in order to
|
||||
|
@ -332,7 +332,7 @@ Status BinaryOpVariants(OpKernelContext* ctx, VariantBinaryOp op,
|
||||
const Variant& a, const Variant& b, Variant* out) {
|
||||
if (a.TypeId() != b.TypeId()) {
|
||||
return errors::Internal(
|
||||
"BianryOpVariants: Variants a and b have different "
|
||||
"BinaryOpVariants: Variants a and b have different "
|
||||
"type ids. Type names: '",
|
||||
a.TypeName(), "' vs. '", b.TypeName(), "'");
|
||||
}
|
||||
|
@ -167,12 +167,12 @@ class SymbolicGradientBuilder {
|
||||
|
||||
// A vector of output endpoints which represents backpropagated
|
||||
// gradients
|
||||
typedef std::vector<NodeOut> BackpropedGradients;
|
||||
typedef std::vector<NodeOut> BackproppedGradients;
|
||||
|
||||
// backprops_ is a map from a node output to its accumulated
|
||||
// gradients. When a node output has accumulated all its
|
||||
// gradients, we add a node which sums them up.
|
||||
std::unordered_map<NodeOut, BackpropedGradients, NodeOutHash, NodeOutEq>
|
||||
std::unordered_map<NodeOut, BackproppedGradients, NodeOutHash, NodeOutEq>
|
||||
backprops_;
|
||||
|
||||
// pending[i] is count-down counter for i-th node's expected
|
||||
|
@ -137,7 +137,7 @@ class GraphConstructor {
|
||||
// remove this.
|
||||
bool importing;
|
||||
// If true, validates that nodes being converted have all expected attrs
|
||||
// set and no unknonw attrs set by calling ValidateNodeDef().
|
||||
// set and no unknown attrs set by calling ValidateNodeDef().
|
||||
// `validate_nodes` is always true when `importing` is set.
|
||||
bool validate_nodes;
|
||||
bool validate_colocation_constraints;
|
||||
|
@ -44,7 +44,7 @@ struct GraphConstructorOptions {
|
||||
bool expect_device_spec = false;
|
||||
|
||||
// If true, validates that nodes being converted have all expected attrs
|
||||
// set and no unknonw attrs set by calling ValidateNodeDef().
|
||||
// set and no unknown attrs set by calling ValidateNodeDef().
|
||||
// Setting validate_nodes without add_default_attributes, will fail if
|
||||
// the GraphDef does not have all required attributes set.
|
||||
bool validate_nodes = false;
|
||||
|
@ -3196,7 +3196,7 @@ versions {
|
||||
EXPECT_EQ(17, refiner.graph_def_version());
|
||||
}
|
||||
|
||||
TEST_F(GraphConstructorTest, ImportGraphDef_ValidateColationConstraints) {
|
||||
TEST_F(GraphConstructorTest, ImportGraphDef_ValidateColocationConstraints) {
|
||||
GraphDef def;
|
||||
ASSERT_TRUE(protobuf::TextFormat::ParseFromString(
|
||||
"node { name: 'A' op: 'TestInput' attr { key: '_class' value { list { "
|
||||
|
@ -74,7 +74,7 @@ int inline GetTensorMetaDataIndex(int n, int total_tensors) {
|
||||
return DataIndexToMetaDataIndex(tidx, total_tensors);
|
||||
}
|
||||
|
||||
// check if the control between src and dst nodes alreay exists
|
||||
// check if the control between src and dst nodes already exists
|
||||
bool inline DoesControlEdgeExist(const Node* src, const Node* dst) {
|
||||
for (const Edge* edge : src->out_edges()) {
|
||||
if (edge->IsControlEdge() && edge->dst() == dst) {
|
||||
|
@ -3639,7 +3639,7 @@ const MklLayoutRewritePass::RewriteInfo*
|
||||
MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
|
||||
CHECK_NOTNULL(n);
|
||||
|
||||
// QuntizedOps may have attributes other than "T", so decoupled the check
|
||||
// QuantizedOps may have attributes other than "T", so decoupled the check
|
||||
// with a function, CheckForQuantizedNodeRewrite(const Node*).
|
||||
const RewriteInfo* ri = CheckForQuantizedNodeRewrite(n);
|
||||
if (ri != nullptr) return ri;
|
||||
@ -3789,7 +3789,7 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const {
|
||||
// a.k.a. "a->b->c" matches "op1->op2->op3"
|
||||
//
|
||||
|
||||
// Stores the first unvisted outgoing edge of each matched node in "nodes".
|
||||
// Stores the first unvisited outgoing edge of each matched node in "nodes".
|
||||
std::stack<EdgeSet::const_iterator> current_neighbor_stack;
|
||||
nodes.clear();
|
||||
|
||||
|
@ -613,7 +613,7 @@ TEST_F(SingleMachineTest, PeakMemoryStatsNotEnabled) {
|
||||
|
||||
TF_CHECK_OK(cluster_->Shutdown());
|
||||
cluster_.reset();
|
||||
SingleMachine cluster(60 /* timout_s */, 3 /* num_cpu_cores */,
|
||||
SingleMachine cluster(60 /* timeout_s */, 3 /* num_cpu_cores */,
|
||||
0 /* num_gpus */);
|
||||
|
||||
TF_CHECK_OK(cluster.Provision());
|
||||
|
@ -524,7 +524,7 @@ bool IsWhiteListedOpTypeForEvaluateNode(const string& op_type) {
|
||||
"TruncateDiv",
|
||||
"TruncateMod",
|
||||
"RealDiv",
|
||||
// N-ary arithemtic ops
|
||||
// N-ary arithmetic ops
|
||||
"AddN",
|
||||
// Others
|
||||
"StridedSlice",
|
||||
@ -1313,7 +1313,7 @@ class SymbolicShapeRefiner {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create input tensors from the NodeConext.
|
||||
// Create input tensors from the NodeContext.
|
||||
void CreateInputTensors(NodeContext* c,
|
||||
std::vector<Tensor>* input_tensor_vector,
|
||||
TensorVector* inputs) {
|
||||
@ -1362,7 +1362,7 @@ class SymbolicShapeRefiner {
|
||||
|
||||
// Input to EvaluateNode()
|
||||
TensorVector inputs;
|
||||
// Container for temporaily created tensor object.
|
||||
// Container for temporarily created tensor object.
|
||||
std::vector<Tensor> input_tensor_vector(ic->num_inputs());
|
||||
CreateInputTensors(c, &input_tensor_vector, &inputs);
|
||||
|
||||
|
@ -879,7 +879,7 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape) {
|
||||
EXPECT_EQ(DT_FLOAT, restorev2_prop.dtype());
|
||||
EXPECT_EQ("float: [128,256]", PropToString(restorev2_prop));
|
||||
|
||||
// Check input shapes of assign op are propagted correctly.
|
||||
// Check input shapes of assign op are propagated correctly.
|
||||
const auto input_props = properties.GetInputProperties("init_restore");
|
||||
ASSERT_EQ(2, input_props.size());
|
||||
const OpInfo::TensorProperties& input_prop = input_props[1];
|
||||
@ -995,7 +995,7 @@ TEST_F(GraphPropertiesTest, SkippingValueInferenceForLargeTensors) {
|
||||
// When using aggressive_shape_inference, we run EvaluateNode() for
|
||||
// whitelisted ops and small input / output tensors. For instance, Fill op is
|
||||
// evaluated and produces output tensor value if output tensor size is smal
|
||||
// (currently, fewer than 17 elements); otherwise we don't run EvalauteNode().
|
||||
// (currently, fewer than 17 elements); otherwise we don't run EvaluateNode().
|
||||
// This is to avoid wasting time and memory for producing huge tensors (e.g.,
|
||||
// initializing a large table using Fill.
|
||||
{
|
||||
|
@ -868,7 +868,7 @@ int64 OpLevelCostEstimator::CountMatMulOperations(const OpInfo& op_info,
|
||||
LOG(ERROR) << "Incompatible Matrix dimensions";
|
||||
return ops;
|
||||
} else {
|
||||
// One of k_dim and k_dim_b might be 1 (mininum dimension size).
|
||||
// One of k_dim and k_dim_b might be 1 (minimum dimension size).
|
||||
k_dim = std::max(k_dim, k_dim_b);
|
||||
}
|
||||
|
||||
|
@ -200,7 +200,7 @@ class OpLevelCostEstimator {
|
||||
typedef std::function<Costs(const OpContext& op_context)> CostImpl;
|
||||
std::map<string, CostImpl> device_cost_impl_;
|
||||
// If true, assume compute and memory overlap; hence, the op cost is max of
|
||||
// compute_time and memory_time, insteaf of sum of those two.
|
||||
// compute_time and memory_time, instead of sum of those two.
|
||||
bool compute_memory_overlap_;
|
||||
std::set<string> persistent_ops_;
|
||||
|
||||
|
@ -521,7 +521,7 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
|
||||
estimator_.compute_memory_overlap_ = value;
|
||||
}
|
||||
|
||||
void ValidateOpDimensionsFromImputs(const int n, const int h, const int w,
|
||||
void ValidateOpDimensionsFromInputs(const int n, const int h, const int w,
|
||||
const int c, const int kx, const int ky,
|
||||
const int sx, const int sy,
|
||||
const string& data_format,
|
||||
@ -571,12 +571,12 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
|
||||
TEST_F(OpLevelCostEstimatorTest, TestPersistentOpCosts) {
|
||||
OpContext op_context;
|
||||
SetCpuDevice(&op_context.op_info);
|
||||
std::unordered_set<string> persisent_ops = {
|
||||
std::unordered_set<string> persistent_ops = {
|
||||
"Const", "Variable", "VariableV2", "AutoReloadVariable",
|
||||
"VarHandleOp", "ReadVariableOp",
|
||||
};
|
||||
// Minmum cost for all persistent ops.
|
||||
for (const auto& op : persisent_ops) {
|
||||
// Minimum cost for all persistent ops.
|
||||
for (const auto& op : persistent_ops) {
|
||||
op_context.op_info.set_op(op);
|
||||
auto cost = estimator_.PredictCosts(op_context);
|
||||
EXPECT_EQ(Costs::Duration(0), cost.memory_time);
|
||||
@ -1128,10 +1128,10 @@ TEST_F(OpLevelCostEstimatorTest, OpDimensionsFromInputs) {
|
||||
for (const auto& p : paddings) {
|
||||
for (const auto& f : formats) {
|
||||
// n, h, w, c, kx, ky, sx, sy, data_format, padding.
|
||||
ValidateOpDimensionsFromImputs(10, 20, 20, 100, 3, 3, 2, 2, f, p);
|
||||
ValidateOpDimensionsFromImputs(10, 20, 20, 100, 1, 1, 3, 3, f, p);
|
||||
ValidateOpDimensionsFromImputs(10, 200, 200, 100, 5, 5, 3, 3, f, p);
|
||||
ValidateOpDimensionsFromImputs(10, 14, 14, 3840, 3, 3, 2, 2, f, p);
|
||||
ValidateOpDimensionsFromInputs(10, 20, 20, 100, 3, 3, 2, 2, f, p);
|
||||
ValidateOpDimensionsFromInputs(10, 20, 20, 100, 1, 1, 3, 3, f, p);
|
||||
ValidateOpDimensionsFromInputs(10, 200, 200, 100, 5, 5, 3, 3, f, p);
|
||||
ValidateOpDimensionsFromInputs(10, 14, 14, 3840, 3, 3, 2, 2, f, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -262,7 +262,7 @@ const NodeDef* CompositeNodeManager::GetCurrNode() {
|
||||
// FirstReady for _Send and _Recv (separately),
|
||||
// Globally (among the LIFO-selected ops from each device and _Send and
|
||||
// _Recv) FirstReady,
|
||||
// Priorty order: _Send, _Recv, and then the rest, if time_ready is equal.
|
||||
// Priority order: _Send, _Recv, and then the rest, if time_ready is equal.
|
||||
std::vector<std::pair<const NodeDef*, Costs::Duration>> candidates;
|
||||
for (auto& ops_lifo : ops_lifo_map_) {
|
||||
if (!ops_lifo.second.Empty()) {
|
||||
@ -294,7 +294,7 @@ const NodeDef* CompositeNodeManager::GetCurrNode() {
|
||||
// Both are normal ops; use node name as tie breaker.
|
||||
return a.first->name().compare(b.first->name()) < 0;
|
||||
} else {
|
||||
// Priortize by op type: _Send, _Recv, and normap ops.
|
||||
// Prioritize by op type: _Send, _Recv, and normap ops.
|
||||
return a_score > b_score;
|
||||
}
|
||||
} else {
|
||||
@ -989,7 +989,7 @@ Costs VirtualScheduler::Summary() const {
|
||||
std::map<string, int64> op_to_memory;
|
||||
// First profile only persistent memory usage.
|
||||
int64 persistent_memory_usage = 0;
|
||||
std::set<string> persisent_ops;
|
||||
std::set<string> persistent_ops;
|
||||
for (const auto& node_port : state.persistent_nodes) {
|
||||
const auto* node = node_port.first;
|
||||
const auto port = node_port.second;
|
||||
@ -997,7 +997,7 @@ Costs VirtualScheduler::Summary() const {
|
||||
CalculateOutputSize(node_map_.at(node).output_properties, port);
|
||||
persistent_memory_usage += output_size;
|
||||
op_to_memory[node->op()] += output_size;
|
||||
persisent_ops.insert(node->op());
|
||||
persistent_ops.insert(node->op());
|
||||
}
|
||||
int64 max_memory_usage = persistent_memory_usage + state.max_memory_usage;
|
||||
critical_path_costs.estimated_max_memory_per_device[name] =
|
||||
@ -1076,7 +1076,7 @@ Costs VirtualScheduler::Summary() const {
|
||||
memory_cost, intermediate_memory_cost)
|
||||
<< " (" << HumanReadableNumBytes(op_mem_usage) << " ["
|
||||
<< mem_usage_percent << "%] "
|
||||
<< (persisent_ops.count(op) > 0 ? ": persistent op)" : ")");
|
||||
<< (persistent_ops.count(op) > 0 ? ": persistent op)" : ")");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -415,7 +415,7 @@ TEST_F(ReadyNodeManagerTest, RemoveSingleNodeCompositeNodeManager) {
|
||||
EXPECT_TRUE(manager.Empty());
|
||||
}
|
||||
|
||||
TEST_F(ReadyNodeManagerTest, GetAndRemoveMultipleComopsiteNodeManager) {
|
||||
TEST_F(ReadyNodeManagerTest, GetAndRemoveMultipleCompositeNodeManager) {
|
||||
CompositeNodeManager manager;
|
||||
TF_EXPECT_OK(manager.Init(&node_states_));
|
||||
manager.AddNode(&node1_);
|
||||
@ -445,7 +445,7 @@ TEST_F(ReadyNodeManagerTest, GetAndRemoveMultipleComopsiteNodeManager) {
|
||||
EXPECT_TRUE(manager.Empty());
|
||||
}
|
||||
|
||||
TEST_F(ReadyNodeManagerTest, MultiDeviceSendRecvComopsiteNodeManager) {
|
||||
TEST_F(ReadyNodeManagerTest, MultiDeviceSendRecvCompositeNodeManager) {
|
||||
CompositeNodeManager manager;
|
||||
TF_EXPECT_OK(manager.Init(&node_states_));
|
||||
// Additional nodes on kCPU1.
|
||||
@ -2966,7 +2966,7 @@ TEST_F(VirtualSchedulerTest, GraphWihtOnlyRecv) {
|
||||
}
|
||||
|
||||
TEST_F(VirtualSchedulerTest, AddMergeSwitch) {
|
||||
// Override scheduler_ with CompositeNodeNamager.
|
||||
// Override scheduler_ with CompositeNodeManager.
|
||||
scheduler_ = absl::make_unique<TestVirtualScheduler>(
|
||||
/*use_static_shapes=*/true,
|
||||
/*use_aggressive_shape_inference=*/true, &composite_node_manager_,
|
||||
|
@ -89,7 +89,7 @@ void SigNode::ComputeTopoHash0() {
|
||||
topo_hash_.clear();
|
||||
last_hashed_nodes_ = next_hashed_nodes_ = node_mask_;
|
||||
|
||||
// TODO(babkin): include the attrbutes too, as an option.
|
||||
// TODO(babkin): include the attributes too, as an option.
|
||||
size_t hval = std::hash<string>()(opcode());
|
||||
|
||||
// Getting the topology of the links in to the hash early should get more
|
||||
|
@ -195,7 +195,7 @@ class MutableGraphView : public internal::GraphViewInternal<GraphDef, NodeDef> {
|
||||
|
||||
// Removes control dependency `fanin_node_name` from the target node named
|
||||
// `node_name`. If the node or fanin do not exist in the graph, nothing will
|
||||
// be modified in the graph. To remove regular fanins, use RemoveRegualrFanin.
|
||||
// be modified in the graph. To remove regular fanins, use RemoveRegularFanin.
|
||||
//
|
||||
// If the fanin being removed doesn't exist in the node's inputs, this will
|
||||
// not result in an error and the node will not be modified.
|
||||
|
@ -1947,7 +1947,7 @@ TEST(MutableGraphViewTest, SwapRegularFaninsByPorts) {
|
||||
/*to_port=*/2, /*success=*/true, error_msg,
|
||||
{"a", "b:2", "b:2", "^c", "^d"});
|
||||
|
||||
// Swaping fanins at out of bounds ports.
|
||||
// Swapping fanins at out of bounds ports.
|
||||
// Node with no regular fanins and no controls.
|
||||
error_msg =
|
||||
"MutableGraphView::SwapRegularFaninsByPorts(node_name='foo_5', "
|
||||
|
@ -1044,7 +1044,7 @@ class MinimizeBroadcasts : public ArithmeticNodesGroupOptimizerStage {
|
||||
// short tree, pushing to the front will create a tall tree. We prefer to
|
||||
// get a wide tree, it minimizes the potential number of temporary tensors
|
||||
// required to keep in memory, though sometimes we can go up to prevent
|
||||
// propagating a brodcast from leaves to the root. Example:
|
||||
// propagating a broadcast from leaves to the root. Example:
|
||||
//
|
||||
// inputs: [s, s, s, M] (s - scalar, M - matrix)
|
||||
// @* - op with broadcast
|
||||
@ -1466,7 +1466,7 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
|
||||
}
|
||||
if (NumControlOutputs(*node, *ctx().node_map) > 0) {
|
||||
// TODO(ezhulenev): Unary ops after Split might have a control path to
|
||||
// the Split node, and we currently do not propertly handle cycles.
|
||||
// the Split node, and we currently do not properly handle cycles.
|
||||
return false;
|
||||
}
|
||||
return num_split > 1 && !IsAlreadyOptimized(*node);
|
||||
@ -1509,7 +1509,7 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
|
||||
// Follow the chains starting at each concat input or split output as long
|
||||
// as all the following conditions hold:
|
||||
// 1. The ops in all chains are the same.
|
||||
// 2. The ops are unary elemenwise op.
|
||||
// 2. The ops are unary elementwise op.
|
||||
// 3. The op output has only a single consumer (concat only).
|
||||
ChainLinkSet cur_tails;
|
||||
TF_RETURN_IF_ERROR(InitializeChains(root_node, &cur_tails));
|
||||
|
@ -1781,7 +1781,7 @@ void ConstantFolding::ReplaceOperationWithSnapshot(
|
||||
int input_to_forward, const GraphProperties& properties, NodeDef* node,
|
||||
GraphDef* graph) {
|
||||
// If the graph contains no ops that mutate their inputs, we can
|
||||
// use Identity insted of Snapshot.
|
||||
// use Identity instead of Snapshot.
|
||||
if (!graph_contains_assign_or_inplace_op_) {
|
||||
ReplaceOperationWithIdentity(input_to_forward, properties, node, graph);
|
||||
return;
|
||||
@ -3138,7 +3138,7 @@ bool ConstantFolding::ConstantPushDown(GraphProperties* properties,
|
||||
ctx.left_child_is_const ? node->input(1) : node->input(0);
|
||||
VLOG(1) << "input_c = " << input_c << "\ninput_x = " << input_x;
|
||||
|
||||
// Now we have identified the nodes to swap, updare the nodemap accordingly.
|
||||
// Now we have identified the nodes to swap, update the nodemap accordingly.
|
||||
node_map_->UpdateInput(node->name(), input_c, input_x);
|
||||
node_map_->AddOutput(input_c, ctx.op_child->name());
|
||||
if (input_x != input_y) {
|
||||
|
@ -2658,7 +2658,7 @@ TEST_F(ConstantFoldingTest, PaddingWithZeroSize) {
|
||||
test::ExpectTensorEqual<int>(tensors_expected[0], tensors[0]);
|
||||
}
|
||||
|
||||
TEST_F(ConstantFoldingTest, SqueezeWithAllDimesionsGreaterThanOne) {
|
||||
TEST_F(ConstantFoldingTest, SqueezeWithAllDimensionsGreaterThanOne) {
|
||||
tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
|
||||
|
||||
auto in1 = ops::Variable(scope.WithOpName("in1"), {2, 3}, DT_INT32);
|
||||
@ -3379,7 +3379,7 @@ TEST_F(ConstantFoldingTest, SwitchIdenticalInputs) {
|
||||
EXPECT_EQ(1, tensors.size());
|
||||
test::ExpectTensorEqual<bool>(tensors_expected[0], tensors[0]);
|
||||
|
||||
// Evalute id_false when input tensor is false.
|
||||
// Evaluate id_false when input tensor is false.
|
||||
x_t.flat<bool>()(0) = false;
|
||||
tensors_expected = EvaluateNodes(item.graph, {"id_false"}, {{"x", x_t}});
|
||||
EXPECT_EQ(1, tensors_expected.size());
|
||||
@ -3689,8 +3689,8 @@ TEST_F(ConstantFoldingTest, TrivialPack) {
|
||||
EXPECT_EQ(tensors_expected[1].shape(), tensors[1].shape());
|
||||
}
|
||||
|
||||
// The test does not evalute the optimized and original graphs to check if their
|
||||
// outputs are the same. See b/78233179.
|
||||
// The test does not evalaute the optimized and original graphs to check if
|
||||
// their outputs are the same. See b/78233179.
|
||||
TEST_F(ConstantFoldingTest, Enter) {
|
||||
GrapplerItem item;
|
||||
AttrValue frame_name;
|
||||
|
@ -40,7 +40,7 @@ TEST(LatencyAllEdgesTest, AddLatenciesAfterTensorMapPrefetch) {
|
||||
NodeDef map_node = NDef("map_node", "MapDataset",
|
||||
{"from_tensor_node", "captured_input_node"},
|
||||
{{"f", {}},
|
||||
{"Targumemts", {}},
|
||||
{"Targuments", {}},
|
||||
{"output_shapes", {}},
|
||||
{"output_types", {}}});
|
||||
NodeDef buffer_size_node = NDef("buffer_size_node", "Const", {},
|
||||
|
@ -1177,7 +1177,7 @@ Status InlineFunctionCalls(const GrapplerItem& item,
|
||||
GraphDef* output_graph) {
|
||||
bool is_aggressive = opt_level == RewriterConfig::AGGRESSIVE;
|
||||
VLOG(2) << "Inline function calls: grappler_item_id=" << item.id
|
||||
<< " (aggessive_mode=" << is_aggressive << ")";
|
||||
<< " (aggressive_mode=" << is_aggressive << ")";
|
||||
|
||||
FunctionLibraryDefinition flib_def =
|
||||
FunctionLibraryDefinition(OpRegistry::Global(), item.graph.library());
|
||||
|
@ -735,7 +735,7 @@ class NodeProcessor : public GraphProcessor {
|
||||
if (IsConstant(*param_node)) {
|
||||
TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(param_index, permute));
|
||||
} else {
|
||||
AddDataFormatTranformToParamInput(op, param_index, dtype);
|
||||
AddDataFormatTransformToParamInput(op, param_index, dtype);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1038,8 +1038,8 @@ class NodeProcessor : public GraphProcessor {
|
||||
return added_node;
|
||||
}
|
||||
|
||||
void AddDataFormatTranformToParamInput(const string& op, int input_pos,
|
||||
DataType dtype) {
|
||||
void AddDataFormatTransformToParamInput(const string& op, int input_pos,
|
||||
DataType dtype) {
|
||||
string suffix = (op == "DataFormatVecPermute") ? kVecPermuteNHWCToNCHW
|
||||
: kDimMapNHWCToNCHW;
|
||||
string name = LayoutOptimizerNode(
|
||||
|
@ -1147,7 +1147,7 @@ bool SwappingPass(RewriterConfig::MemOptType optimization_level,
|
||||
// Use heuristics to figure out what needs to be swapped;
|
||||
IdentifySwappingCandidates(cluster, item, skip_list, &nodes_to_swap);
|
||||
}
|
||||
// Look for manual annotatations in the graph.
|
||||
// Look for manual annotations in the graph.
|
||||
for (auto& node : *item->graph.mutable_node()) {
|
||||
if (node.attr().count("_swap_to_host") != 0) {
|
||||
SwapInfo& swap_info = nodes_to_swap[&node];
|
||||
|
@ -520,7 +520,7 @@ TEST_F(RelaxAllocatorConstraintsTest, SameDeviceType) {
|
||||
{128, 128}, DT_FLOAT);
|
||||
Output assign = ops::Assign(s.WithOpName("assign").WithDevice("/cpu:0"),
|
||||
variable, constant);
|
||||
// Assign and Exp run on different devies, but do not straddle a CPU:GPU
|
||||
// Assign and Exp run on different devices, but do not straddle a CPU:GPU
|
||||
// boundary, so we can we do not need to enforce allocation in pinned memory.
|
||||
Output exp = ops::Exp(s.WithOpName("exp").WithDevice("/cpu:1"), assign);
|
||||
|
||||
|
@ -737,7 +737,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
|
||||
// TPU metadata and Grappler passes could prune that away. Grappler
|
||||
// passes could also cause issues around shape inference. Since the
|
||||
// desired and existing behavior is to not optimize TPU functions with
|
||||
// Grappler, this check preserves that. The only execption is
|
||||
// Grappler, this check preserves that. The only exception is
|
||||
// implementation selector what is required to swap in some TPU specific
|
||||
// lowering code and is verified the work correctly on TPUs.
|
||||
ImplementationSelector implementation_selector;
|
||||
|
@ -69,7 +69,7 @@ class MetaOptimizer : public GraphOptimizer {
|
||||
const RewriterConfig::CustomGraphOptimizer* GetCustomGraphOptimizerConfig(
|
||||
const string& name) const;
|
||||
|
||||
// Initialiaze active verifiers from the RewriterConfig toggles.
|
||||
// Initialize active verifiers from the RewriterConfig toggles.
|
||||
void InitializeVerifiers(
|
||||
std::vector<std::unique_ptr<GraphVerifier>>* inter_optimizer_verifiers,
|
||||
std::vector<std::unique_ptr<GraphVerifier>>* post_optimization_verifiers)
|
||||
|
@ -299,7 +299,7 @@ Status RewriteIdentityNAndInputsOutputs(
|
||||
// Rewrite IdentityN node and associated inputs and outputs. For inputs and
|
||||
// outputs that don't lead to a terminal node, a new Identity node is created
|
||||
// and those inputs and outputs are rewritten to use the new Identity node as
|
||||
// their outputs and inputs respectively. For the remaining nodes, the ouputs
|
||||
// their outputs and inputs respectively. For the remaining nodes, the outputs
|
||||
// have their inputs updated with the adjusted port, from the IdentityN node
|
||||
// having less inputs.
|
||||
struct NodeOutputUpdate {
|
||||
|
@ -406,7 +406,7 @@ TEST_F(ModelPrunerTest, PruningForwardsCtrlDependencies) {
|
||||
}
|
||||
*/
|
||||
|
||||
TEST_F(ModelPrunerTest, PruningPerservesFetch) {
|
||||
TEST_F(ModelPrunerTest, PruningPreservesFetch) {
|
||||
// Build a simple graph with a few trivially prunable ops.
|
||||
GrapplerItem item;
|
||||
{
|
||||
@ -445,7 +445,7 @@ TEST_F(ModelPrunerTest, PruningPerservesFetch) {
|
||||
test::ExpectTensorEqual<float>(actual_tensors[0], expected_tensors[0]);
|
||||
}
|
||||
|
||||
TEST_F(ModelPrunerTest, PruningPerservesCrossDeviceIdentity) {
|
||||
TEST_F(ModelPrunerTest, PruningPreservesCrossDeviceIdentity) {
|
||||
GrapplerItem item;
|
||||
{
|
||||
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
|
||||
|
@ -119,7 +119,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
|
||||
}
|
||||
|
||||
// These nodes may be optimized away downstream (even if pinned to Host), we
|
||||
// should (recusively) check their source.
|
||||
// should (recursively) check their source.
|
||||
if (IsIdentity(node) || IsIdentityNSingleInput(node)) {
|
||||
for (const auto& fanin : graph.GetFanins(node, false)) {
|
||||
bool fanin_candidate = false;
|
||||
|
@ -389,7 +389,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter {
|
||||
for (const InputDesc& nd : inputs) {
|
||||
if (op_set.find(nd.from_node_def->name()) != op_set.end()) {
|
||||
if (nd.output_slot != tensorflow::Graph::kControlSlot) {
|
||||
return errors::Internal("Data edge exists bewtween ",
|
||||
return errors::Internal("Data edge exists between ",
|
||||
nd.from_node_def->name(),
|
||||
" and another "
|
||||
"node in the set");
|
||||
|
@ -250,7 +250,7 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
// Invokes ScopedAllocatorOptimizer on `graph_def`, then executes it and
|
||||
// returns the outputs specifed by `output_names` in `outputs`.
|
||||
// returns the outputs specified by `output_names` in `outputs`.
|
||||
void ExecuteGraph(const GraphDef& graph_def,
|
||||
const std::vector<string>& output_names,
|
||||
std::vector<Tensor>* outputs) {
|
||||
|
@ -982,7 +982,7 @@ void MutableGraphView::FixRenamedFanouts(
|
||||
// leftover fanouts, mark their respective fanin fanout_index_ to
|
||||
// internal::kMissingIndex as an indicator so when it comes to updating or
|
||||
// removing fanins inplace, nodes with the same index don't get affected and
|
||||
// other fanouts are accidently removed.
|
||||
// other fanouts are accidentally removed.
|
||||
for (auto& renamed_fanout : renamed_fanouts) {
|
||||
for (auto& regular_fanouts :
|
||||
renamed_fanout.second.regular_fanouts_by_port_) {
|
||||
|
@ -55,13 +55,13 @@ class GrapplerTest : public ::testing::Test {
|
||||
// with the same inputs and attributes. Nodes can be in different order.
|
||||
//
|
||||
// NOTE: This function uses EXPECT/ASSERT macros to check node properties
|
||||
// equality, and adds all failuires to the current test.
|
||||
// equality, and adds all failures to the current test.
|
||||
void CompareGraphs(GraphDef want, GraphDef got) const;
|
||||
|
||||
// Checks if two nodes have the same name, op, inputs and attributes.
|
||||
//
|
||||
// NOTE: This function uses EXPECT/ASSERT macros to check node properties
|
||||
// equality, and adds all failuires to the current test.
|
||||
// equality, and adds all failures to the current test.
|
||||
void CompareNodes(const NodeDef& want, const NodeDef& got) const;
|
||||
|
||||
// Checks if two functions are equal. Both functions must have the same set of
|
||||
|
@ -273,7 +273,7 @@ class AdjustContrastOpv2<CPUDevice, float> : public AdjustContrastOpV2Base {
|
||||
//
|
||||
// The algorithm itself can handle size that is not power-of-two. Note
|
||||
// that in each round we sum up elements that are contiguous. So we can
|
||||
// use their flattened structure to gain vectorinization efficiency.
|
||||
// use their flattened structure to gain vectorization efficiency.
|
||||
do {
|
||||
int64 right_size = remaining_size / 2;
|
||||
int64 left_size = remaining_size - right_size;
|
||||
@ -324,7 +324,7 @@ class AdjustContrastOpv2<CPUDevice, float> : public AdjustContrastOpV2Base {
|
||||
// Similar to the reduction case, a straightforward implementation of this
|
||||
// does not utilize vectorization well because of the small channel size.
|
||||
// This algorithm repeatedly increases the area to be copied, and leads to
|
||||
// much better vectorinizations in the copy.
|
||||
// much better vectorizations in the copy.
|
||||
for (int64 i = 0; i < batch; i++) {
|
||||
// Copy over the inputs into outputs in this batch. Effectively:
|
||||
// outputs(i, :, k) = inputs(i, k). An example of how this algorithm
|
||||
@ -354,7 +354,7 @@ class AdjustContrastOpv2<CPUDevice, float> : public AdjustContrastOpV2Base {
|
||||
int64 copied = 1;
|
||||
while (copied < image_size) {
|
||||
// Repeatedly increases the number of elements to copy so they have
|
||||
// better vectorinizations. However, the source of the copy has to be
|
||||
// better vectorizations. However, the source of the copy has to be
|
||||
// not too large to stay in the cache.
|
||||
const int64 kMaxToCopy = 1024;
|
||||
int64 to_copy = std::min({copied, image_size - copied, kMaxToCopy});
|
||||
|
@ -409,7 +409,7 @@ template <typename TaskType>
|
||||
void AdaptiveSharedBatchScheduler<TaskType>::MaybeScheduleNextBatch() {
|
||||
if (batches_.empty() || in_flight_batches_ >= in_flight_batches_limit_)
|
||||
return;
|
||||
// Non-integer limit handled probabilistially.
|
||||
// Non-integer limit handled probabilistically.
|
||||
if (in_flight_batches_limit_ - in_flight_batches_ < 1 &&
|
||||
rand_double_(rand_engine_) >
|
||||
in_flight_batches_limit_ - in_flight_batches_) {
|
||||
|
@ -234,7 +234,7 @@ class Queue {
|
||||
using SchedulableBatchCallback = std::function<void()>;
|
||||
Queue(const typename SharedBatchScheduler<TaskType>::QueueOptions& options,
|
||||
Env* env, ProcessBatchCallback process_batch_callback,
|
||||
SchedulableBatchCallback schdulable_batch_callback);
|
||||
SchedulableBatchCallback schedulable_batch_callback);
|
||||
|
||||
// Illegal to destruct unless the queue is empty.
|
||||
~Queue();
|
||||
|
@ -104,7 +104,7 @@ class BoostedTreesCreateQuantileStreamResourceOp : public OpKernel {
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Only create one, if one does not exist already. Report status for all
|
||||
// other exceptions. If one already exists, it unrefs the new one.
|
||||
// An epsilon value of zero could cause perfoamance issues and is therefore,
|
||||
// An epsilon value of zero could cause performance issues and is therefore,
|
||||
// disallowed.
|
||||
const Tensor* epsilon_t;
|
||||
OP_REQUIRES_OK(context, context->input(kEpsilonName, &epsilon_t));
|
||||
|
@ -58,7 +58,7 @@ namespace quantiles {
|
||||
// Compute: O(n * log(1/eps * log(eps * n))).
|
||||
// Memory: O(1/eps * log^2(eps * n)) <- for one worker streaming through the
|
||||
// entire dataset.
|
||||
// An epsilon value of zero would make the algorithm extremely inefficent and
|
||||
// An epsilon value of zero would make the algorithm extremely inefficient and
|
||||
// therefore, is disallowed.
|
||||
template <typename ValueType, typename WeightType,
|
||||
typename CompareFn = std::less<ValueType>>
|
||||
@ -72,7 +72,7 @@ class WeightedQuantilesStream {
|
||||
explicit WeightedQuantilesStream(double eps, int64 max_elements)
|
||||
: eps_(eps), buffer_(1LL, 2LL), finalized_(false) {
|
||||
// See the class documentation. An epsilon value of zero could cause
|
||||
// perfoamance issues.
|
||||
// performance issues.
|
||||
QCHECK(eps > 0) << "An epsilon value of zero is not allowed.";
|
||||
std::tie(max_levels_, block_size_) = GetQuantileSpecs(eps, max_elements);
|
||||
buffer_ = Buffer(block_size_, max_elements);
|
||||
|
@ -340,7 +340,7 @@ boosted_trees::Node* BoostedTreesEnsembleResource::AddLeafNodes(
|
||||
node->mutable_leaf());
|
||||
}
|
||||
node->mutable_metadata()->set_gain(candidate.gain);
|
||||
// TODO(npononareva): this is LAYER-BY-LAYER boosting; add WHOLE-TREE.
|
||||
// TODO(nponomareva): this is LAYER-BY-LAYER boosting; add WHOLE-TREE.
|
||||
if (logits_dimension == 1) {
|
||||
const float prev_logit_value = node->metadata().original_leaf().scalar();
|
||||
left_node->mutable_leaf()->set_scalar(prev_logit_value +
|
||||
|
@ -55,7 +55,7 @@ __global__ void CheckNumericsKernel(const T* __restrict__ data, int size,
|
||||
}
|
||||
|
||||
// V2 of CheckNumericsKernel for GPU.
|
||||
// Unlike CheckNumericsKernel (V1), this kernel disinguishes -Inf and +Inf.
|
||||
// Unlike CheckNumericsKernel (V1), this kernel distinguishes -Inf and +Inf.
|
||||
// The 3 elements of `abnormal_detected` are used to signify NaN, -Inf and +Inf,
|
||||
// respectively.
|
||||
template <typename T>
|
||||
|
@ -68,7 +68,7 @@ void NcclBroadcaster::Run(StatusCallback done) {
|
||||
{
|
||||
// When all devices at this worker have called `SignalMultiNodeReady`, the
|
||||
// `NcclManager` will enqueue the NCCL kernel on the NCCL stream. Thus the
|
||||
// implementation of `UnblockDepdendencies` keeps track of the number of
|
||||
// implementation of `UnblockDependencies` keeps track of the number of
|
||||
// devices that have launched.
|
||||
profiler::TraceMe activity("Schedule", profiler::TraceMeLevel::kInfo);
|
||||
col_ctx_->col_exec->UnblockDependencies(*col_params_);
|
||||
|
@ -299,7 +299,7 @@ inline int64 ConvolveScratchSize() {
|
||||
return convolve_scratch_size;
|
||||
}
|
||||
|
||||
// Finds the best convolutiun algorithm for the given ConvLaunch (cuda
|
||||
// Finds the best convolution algorithm for the given ConvLaunch (cuda
|
||||
// convolution on the stream) and parameters, by running all possible
|
||||
// algorithms and measuring execution time.
|
||||
// TODO(ezhulenev): Move it to conv_ops_gpu.h and share with conv_ops.cc.
|
||||
|
@ -286,7 +286,7 @@ class GpuSparse {
|
||||
//
|
||||
// **NOTE** Matrices B and C are expected to be in column-major
|
||||
// order; to make them consistent with TensorFlow they
|
||||
// must be transposed (or the matmul op's pre/post-procesisng must take this
|
||||
// must be transposed (or the matmul op's pre/post-processing must take this
|
||||
// into account).
|
||||
//
|
||||
// **NOTE** This is an in-place operation for data in C.
|
||||
|
@ -47,7 +47,7 @@ REGISTER_KERNEL_BUILDER(Name("AddV2")
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_KERNEL(type) \
|
||||
REGISTER(BinaryOp, SYCL, "Add", functor::add, type); \
|
||||
REEGISTER(BinaryOp, SYCL, "AddV2", functor::add, type);
|
||||
REGISTER(BinaryOp, SYCL, "AddV2", functor::add, type);
|
||||
|
||||
TF_CALL_SYCL_NUMBER_TYPES(REGISTER_KERNEL);
|
||||
|
||||
|
@ -348,8 +348,8 @@ struct SelectScalarHandler {
|
||||
}
|
||||
};
|
||||
|
||||
// Specilization for CPU device. Forward input to output depending on the `cond`
|
||||
// value.
|
||||
// Specialization for CPU device. Forward input to output depending on the
|
||||
// `cond` value.
|
||||
// TODO(sjhwang): Consider specializing for GPUDevice as well by using
|
||||
// GPUDevice::memcpyDeviceToHost() to fetch bool value.
|
||||
template <typename T>
|
||||
|
@ -49,7 +49,7 @@ struct SimpleBinaryFunctor<GPUDevice, Functor> {
|
||||
};
|
||||
|
||||
// Macros to explicitly instantiate kernels on GPU for multiple types
|
||||
// (T0, T1, etc.) for SimpleBiaryFunctor (e.g., functor::tanh_grad).
|
||||
// (T0, T1, etc.) for SimpleBinaryFunctor (e.g., functor::tanh_grad).
|
||||
#define DEFINE_SIMPLE_BINARY1(F, T) \
|
||||
template struct SimpleBinaryFunctor<GPUDevice, F<T> >
|
||||
#define DEFINE_SIMPLE_BINARY2(F, T0, T1) \
|
||||
|
@ -910,7 +910,7 @@ Status CapturedFunction::IsMultiDevice(IteratorContext* ctx,
|
||||
const FunctionDef* fdef;
|
||||
TF_RETURN_IF_ERROR(LookupFunction(*metadata_->lib_def(), name, &fdef));
|
||||
for (const auto& node : fdef->node_def()) {
|
||||
// Check if the op has a kernel availabe for the current device.
|
||||
// Check if the op has a kernel available for the current device.
|
||||
if (!KernelDefAvailable(current_device_type, node)) {
|
||||
*is_multi_device = true;
|
||||
return Status::OK();
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user