Compilation fixes for MSVC compiler.
PiperOrigin-RevId: 316217725 Change-Id: I595eee325c6bd2ab253e710617c6a0cbaccb6aba
This commit is contained in:
parent
7cf2c773e4
commit
c45a3444af
tensorflow/lite/delegates/gpu/cl
@ -66,12 +66,7 @@ std::string GetSpaceToDepthCode(
|
||||
c += " tmp[i] = t_ar[src_c % 4];\n";
|
||||
c += " }\n";
|
||||
c += " FLT4 result = (FLT4)(tmp[0], tmp[1], tmp[2], tmp[3]);\n";
|
||||
const LinkingContext context = {
|
||||
.var_name = "result",
|
||||
.x_coord = "X",
|
||||
.y_coord = "Y",
|
||||
.s_coord = "Z",
|
||||
};
|
||||
const LinkingContext context{"result", "X", "Y", "Z"};
|
||||
c += PostProcess(linked_operations, context);
|
||||
c += " " + dst_tensor.WriteWHS("result", "X", "Y", "Z");
|
||||
c += "}\n";
|
||||
|
@ -14,6 +14,7 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono> // NOLINT(build/c++11)
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
@ -64,14 +65,13 @@ absl::Status RunModelSample(const std::string& model_name) {
|
||||
|
||||
const int kNumRuns = 10;
|
||||
for (int i = 0; i < kNumRuns; ++i) {
|
||||
const auto start = absl::Now();
|
||||
const auto start = std::chrono::high_resolution_clock::now();
|
||||
for (int k = 0; k < num_runs_per_sec; ++k) {
|
||||
RETURN_IF_ERROR(context.AddToQueue(env.queue()));
|
||||
}
|
||||
RETURN_IF_ERROR(env.queue()->WaitForCompletion());
|
||||
const auto end = absl::Now();
|
||||
const double total_time_ms =
|
||||
static_cast<double>((end - start) / absl::Nanoseconds(1)) * 1e-6;
|
||||
const auto end = std::chrono::high_resolution_clock::now();
|
||||
const double total_time_ms = (end - start).count() * 1e-6f;
|
||||
const double average_inference_time = total_time_ms / num_runs_per_sec;
|
||||
std::cout << "Total time - " << average_inference_time << "ms" << std::endl;
|
||||
}
|
||||
|
@ -36,7 +36,8 @@ int ChannelTypeToSizeInBytes(cl_channel_type type);
|
||||
bool OpenCLSupported();
|
||||
|
||||
template <DataType S, typename T>
|
||||
void CopyLinearFLT4(const Tensor<Linear, S>& src, absl::Span<T> dst) {
|
||||
void CopyLinearFLT4(const tflite::gpu::Tensor<Linear, S>& src,
|
||||
absl::Span<T> dst) {
|
||||
const int dst_depth = dst.size();
|
||||
for (int d = 0; d < dst_depth; ++d) {
|
||||
T val;
|
||||
|
Loading…
Reference in New Issue
Block a user