STT-tensorflow/tensorflow/compiler/xla/array3d.h
Bixia Zheng 8a31fec675 [XLA] FP16 Dot support for the CPU and GPU backends.
Extend the stream interface ThenBlasGemmWithAlgorithm to support F16 matrix
multiplication with computation type FP32.

Extend the stream executor interface DoBlasGemmWithAlgorithm to support F16
GEMM with computation type FP32.

Extend the CPU IR emitter to handle F16 Dot instruction, and add F16 matrix
multiplication implementation to the CPU runtime.

Extend the GPU backend to handle FP16 GEMM Thunk.

Replicate the existing matrix multiplication test cases in
matrix_ops_simple_test and dot_operation_test for FP16.

RELNOTES:
PiperOrigin-RevId: 187369731
2018-02-28 12:59:55 -08:00

81 lines
3.0 KiB
C++

/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_XLA_ARRAY3D_H_
#define TENSORFLOW_COMPILER_XLA_ARRAY3D_H_
#include <algorithm>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <memory>
#include <numeric>
#include <random>
#include "tensorflow/compiler/xla/array.h"
#include "tensorflow/compiler/xla/types.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/types.h"
namespace xla {
// Simple 3D array structure.
template <typename T>
class Array3D : public Array<T> {
public:
Array3D() : Array<T>(std::vector<int64>{0, 0, 0}) {}
// Creates an array of dimensions n1 x n2 x n3, uninitialized values.
Array3D(const int64 n1, const int64 n2, const int64 n3)
: Array<T>(std::vector<int64>{n1, n2, n3}) {}
// Creates an array of dimensions n1 x n2 x n3, initialized to value.
Array3D(const int64 n1, const int64 n2, const int64 n3, const T value)
: Array<T>(std::vector<int64>{n1, n2, n3}, value) {}
// Creates an array from the given nested initializer list. The outer
// initializer list is the first dimension, and so on.
//
// For example {{{1, 2}, {3, 4}, {5, 6}, {7, 8}},
// {{9, 10}, {11, 12}, {13, 14}, {15, 16}},
// {{17, 18}, {19, 20}, {21, 22}, {23, 24}}}
// results in an array with n1=3, n2=4, n3=2.
Array3D(std::initializer_list<std::initializer_list<std::initializer_list<T>>>
values)
: Array<T>(values) {}
// Creates an array of a floating-point type (half, bfloat16, float,
// or double) from the given nested initializer list of float values.
template <typename T2, typename = typename std::enable_if<
(std::is_same<T, Eigen::half>::value ||
std::is_same<T, bfloat16>::value ||
std::is_same<T, float>::value ||
std::is_same<T, double>::value) &&
std::is_same<T2, float>::value>::type>
Array3D(
std::initializer_list<std::initializer_list<std::initializer_list<T2>>>
values)
: Array<T>(values) {}
int64 n1() const { return this->dim(0); }
int64 n2() const { return this->dim(1); }
int64 n3() const { return this->dim(2); }
};
} // namespace xla
#endif // TENSORFLOW_COMPILER_XLA_ARRAY3D_H_