STT-tensorflow/tensorflow/lite/delegates/gpu/api.h

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_LITE_DELEGATES_GPU_API_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_API_H_

// Usage example:
//
//   // Builder is created from a model using GPU-specific parameters.
//   std::unique_ptr<InferenceBuilder> builder = ...;
//
//   // input data is coming from a texture
//   // output data goes to CPU
//   builder->SetInputObjectDef(0, {DataType::FLOAT16, DataLayout::PHWC4,
//                                  ObjectType::OPENGL_TEXTURE, true});
//   builder->SetOutputObjectDef(0, {DataType::FLOAT32, DataLayout::BHWC,
//                                  ObjectType::CPU_MEMORY, false});
//   std::unique_ptr<InferenceRunner> runner;
//   RETURN_IF_ERROR(builder->Build(&runner));  // may take significant time.
//   RETURN_IF_ERROR(
//       runner->SetInputObject(0, OpenGlTexture{texture_ud, texture_format}));
//   RETURN_IF_ERROR(runner->Run());

#include <cstdint>
#include <memory>
#include <vector>

#include "absl/types/span.h"
#include "absl/types/variant.h"
#include "third_party/opencl_headers/CL/cl.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"

namespace tflite {
namespace gpu {

// Common abbreviations:
//   B  - batch
//   H  - height
//   W  - width
//   C  - channels
//   D  - depth := IntegralDivideRoundUp(C, 4)
//   C4 - is the constant = 4.
enum class DataLayout {
  UNKNOWN,

  BHWC,

  DHWC4,

  HWDC4,

  HDWC4,
};

enum class ObjectType {
  UNKNOWN,
  OPENGL_SSBO,
  OPENGL_TEXTURE,
  CPU_MEMORY,
  OPENCL_TEXTURE,
  OPENCL_BUFFER,
};

struct OpenGlBuffer {
  GLuint id = GL_INVALID_INDEX;
};

struct OpenGlTexture {
  GLuint id = GL_INVALID_INDEX;
  GLenum format = GL_INVALID_ENUM;
};

struct OpenClBuffer {
  cl_mem memobj;
};

struct OpenClTexture {
  cl_mem memobj;
  // TODO(akulik): should it specify texture format?
};

struct CpuMemory {
  void* data = nullptr;
  size_t size_bytes = 0;
};

template <typename T>
inline CpuMemory MakeCpuMemory(absl::Span<T> t) {
  CpuMemory m;
  m.data = t.data();
  m.size_bytes = t.size() * sizeof(T);
  return m;
}

template <typename T>
inline CpuMemory MakeReadableCpuMemory(absl::Span<const T> t) {
  CpuMemory m;
  m.data = const_cast<T*>(t.data());
  m.size_bytes = t.size() * sizeof(T);
  return m;
}

// Defines object representation.
struct ObjectDef {
  DataType data_type = DataType::UNKNOWN;
  DataLayout data_layout = DataLayout::UNKNOWN;
  ObjectType object_type = ObjectType::UNKNOWN;

  // If true, then object is managed externally and needs to be provided to
  // InferenceRunner by a user before running inference.
  //
  // User-provided objects will not be re-used internally for any purpose to
  // lower overall memory usage.
  bool user_provided = false;

  bool operator==(const ObjectDef& other) const {
    return data_type == other.data_type && data_layout == other.data_layout &&
           object_type == other.object_type &&
           user_provided == other.user_provided;
  }
};

bool IsValid(const ObjectDef& def);

struct Dimensions {
  Dimensions() : b(1), h(1), w(1), c(1) {}

  Dimensions(int32_t batch, int32_t height, int32_t width, int32_t channels)
      : b(batch), h(height), w(width), c(channels) {}

  int32_t d() const { return IntegralDivideRoundUp(c, 4); }

  int32_t product() const { return b * h * w * c; }

  bool operator==(const Dimensions& other) const {
    return b == other.b && h == other.h && w == other.w && c == other.c;
  }

  int32_t b;
  int32_t h;
  int32_t w;
  int32_t c;
};

// Connects tensor shape with corresponding object definition.
struct TensorObjectDef {
  // Dimensions semantic is defined by corresponding DataLayout.
  Dimensions dimensions;
  ObjectDef object_def;

  bool operator==(const TensorObjectDef& other) const {
    return dimensions == other.dimensions && object_def == other.object_def;
  }
};

// @return true if tensor object def is defined.
bool IsValid(const TensorObjectDef& def);

using TensorObject = absl::variant<absl::monostate, OpenGlBuffer, OpenGlTexture,
                                   CpuMemory, OpenClBuffer, OpenClTexture>;

// @return true if object is set and corresponding values are defined.
bool IsValid(const TensorObjectDef& def, const TensorObject& object);

ObjectType GetType(const TensorObject& object);

// @return true if corresponding object is set for the given type
bool IsObjectPresent(ObjectType type, const TensorObject& obj);

class InferenceRunner;

// Allows to inspect and change input and output definitions before a graph is
// prepared for the inference.
class InferenceBuilder {
 public:
  virtual ~InferenceBuilder() {}

  // Returns inference graph inputs and outputs definitions.
  virtual std::vector<TensorObjectDef> inputs() const = 0;
  virtual std::vector<TensorObjectDef> outputs() const = 0;

  // Sets new shape for the input if underlying implementation and graph
  // structure allows dynamic tensors.
  virtual Status SetInputShape(int index, const Dimensions& dimensions) = 0;

  // Updates object definitions for the given index. Implementation may allow
  // to use different layouts and/or data type conversions between objects
  // defined in a graph and given objects, for example:
  //   input '0' is DataType::FLOAT32, DataLayout::BHWC.
  //   A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4.
  //   An implementation may allow this transformation to happen automatically
  //   under the hood.
  virtual Status SetInputObjectDef(int index, ObjectDef def) = 0;
  virtual Status SetOutputObjectDef(int index, ObjectDef def) = 0;
  virtual Status SetAllInputObjectDefsTo(ObjectDef def) {
    auto input_defs = inputs();
    for (int i = 0; i < input_defs.size(); ++i) {
      RETURN_IF_ERROR(SetInputObjectDef(i, def));
    }
    return OkStatus();
  }
  virtual Status SetAllOutputObjectDefsTo(ObjectDef def) {
    auto output_defs = outputs();
    for (int i = 0; i < output_defs.size(); ++i) {
      RETURN_IF_ERROR(SetOutputObjectDef(i, def));
    }
    return OkStatus();
  }

  // Creates new instance of the inference runner. InferenceBuilder stays valid
  // and could be used to create another inference runner if needed.
  //
  // This method may take significant time to prepare new inference runner. For
  // example, it may require to compile OpenGL shaders.
  virtual Status Build(std::unique_ptr<InferenceRunner>* runner) = 0;
};

// Runs prepared inference. Every object marked as external needs to be set
// prior calling Run method.
class InferenceRunner {
 public:
  virtual ~InferenceRunner() {}

  // Returns inference graph inputs and outputs definitions.
  virtual std::vector<TensorObjectDef> inputs() const = 0;
  virtual std::vector<TensorObjectDef> outputs() const = 0;

  // Getters provide access to underlying objects for the given index.
  // Setters allow to set or change external object for the given index. Note,
  // object need to match object definition set before in InferenceBuilder.

  virtual Status GetInputObject(int index, TensorObject* object) = 0;
  virtual Status GetOutputObject(int index, TensorObject* object) = 0;
  virtual Status SetInputObject(int index, TensorObject object) = 0;
  virtual Status SetOutputObject(int index, TensorObject object) = 0;

  virtual Status Run() = 0;
};

}  // namespace gpu
}  // namespace tflite

#endif  // TENSORFLOW_LITE_DELEGATES_GPU_API_H_