187 lines
6.3 KiB
Python
187 lines
6.3 KiB
Python
# Lint as: python2, python3
|
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Library for getting system information during TensorFlow tests."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import ctypes as ct
|
|
import platform
|
|
|
|
import six
|
|
from six.moves import range
|
|
|
|
from tensorflow.core.util import test_log_pb2
|
|
from tensorflow.python.framework import errors
|
|
from tensorflow.python.platform import gfile
|
|
|
|
|
|
def _gather_gpu_devices_proc():
|
|
"""Try to gather NVidia GPU device information via /proc/driver."""
|
|
dev_info = []
|
|
for f in gfile.Glob("/proc/driver/nvidia/gpus/*/information"):
|
|
bus_id = six.ensure_str(f).split("/")[5]
|
|
key_values = dict(
|
|
six.ensure_str(line.rstrip()).replace("\t", "").split(":", 1)
|
|
for line in gfile.GFile(f, "r"))
|
|
key_values = dict((k.lower(), six.ensure_str(v).strip(" ").rstrip(" "))
|
|
for (k, v) in key_values.items())
|
|
info = test_log_pb2.GPUInfo()
|
|
info.model = key_values.get("model", "Unknown")
|
|
info.uuid = key_values.get("gpu uuid", "Unknown")
|
|
info.bus_id = bus_id
|
|
dev_info.append(info)
|
|
return dev_info
|
|
|
|
|
|
class CUDADeviceProperties(ct.Structure):
|
|
# See $CUDA_HOME/include/cuda_runtime_api.h for the definition of
|
|
# the cudaDeviceProp struct.
|
|
_fields_ = [
|
|
("name", ct.c_char * 256),
|
|
("totalGlobalMem", ct.c_size_t),
|
|
("sharedMemPerBlock", ct.c_size_t),
|
|
("regsPerBlock", ct.c_int),
|
|
("warpSize", ct.c_int),
|
|
("memPitch", ct.c_size_t),
|
|
("maxThreadsPerBlock", ct.c_int),
|
|
("maxThreadsDim", ct.c_int * 3),
|
|
("maxGridSize", ct.c_int * 3),
|
|
("clockRate", ct.c_int),
|
|
("totalConstMem", ct.c_size_t),
|
|
("major", ct.c_int),
|
|
("minor", ct.c_int),
|
|
("textureAlignment", ct.c_size_t),
|
|
("texturePitchAlignment", ct.c_size_t),
|
|
("deviceOverlap", ct.c_int),
|
|
("multiProcessorCount", ct.c_int),
|
|
("kernelExecTimeoutEnabled", ct.c_int),
|
|
("integrated", ct.c_int),
|
|
("canMapHostMemory", ct.c_int),
|
|
("computeMode", ct.c_int),
|
|
("maxTexture1D", ct.c_int),
|
|
("maxTexture1DMipmap", ct.c_int),
|
|
("maxTexture1DLinear", ct.c_int),
|
|
("maxTexture2D", ct.c_int * 2),
|
|
("maxTexture2DMipmap", ct.c_int * 2),
|
|
("maxTexture2DLinear", ct.c_int * 3),
|
|
("maxTexture2DGather", ct.c_int * 2),
|
|
("maxTexture3D", ct.c_int * 3),
|
|
("maxTexture3DAlt", ct.c_int * 3),
|
|
("maxTextureCubemap", ct.c_int),
|
|
("maxTexture1DLayered", ct.c_int * 2),
|
|
("maxTexture2DLayered", ct.c_int * 3),
|
|
("maxTextureCubemapLayered", ct.c_int * 2),
|
|
("maxSurface1D", ct.c_int),
|
|
("maxSurface2D", ct.c_int * 2),
|
|
("maxSurface3D", ct.c_int * 3),
|
|
("maxSurface1DLayered", ct.c_int * 2),
|
|
("maxSurface2DLayered", ct.c_int * 3),
|
|
("maxSurfaceCubemap", ct.c_int),
|
|
("maxSurfaceCubemapLayered", ct.c_int * 2),
|
|
("surfaceAlignment", ct.c_size_t),
|
|
("concurrentKernels", ct.c_int),
|
|
("ECCEnabled", ct.c_int),
|
|
("pciBusID", ct.c_int),
|
|
("pciDeviceID", ct.c_int),
|
|
("pciDomainID", ct.c_int),
|
|
("tccDriver", ct.c_int),
|
|
("asyncEngineCount", ct.c_int),
|
|
("unifiedAddressing", ct.c_int),
|
|
("memoryClockRate", ct.c_int),
|
|
("memoryBusWidth", ct.c_int),
|
|
("l2CacheSize", ct.c_int),
|
|
("maxThreadsPerMultiProcessor", ct.c_int),
|
|
("streamPrioritiesSupported", ct.c_int),
|
|
("globalL1CacheSupported", ct.c_int),
|
|
("localL1CacheSupported", ct.c_int),
|
|
("sharedMemPerMultiprocessor", ct.c_size_t),
|
|
("regsPerMultiprocessor", ct.c_int),
|
|
("managedMemSupported", ct.c_int),
|
|
("isMultiGpuBoard", ct.c_int),
|
|
("multiGpuBoardGroupID", ct.c_int),
|
|
# Pad with extra space to avoid dereference crashes if future
|
|
# versions of CUDA extend the size of this struct.
|
|
("__future_buffer", ct.c_char * 4096)
|
|
]
|
|
|
|
|
|
def _gather_gpu_devices_cudart():
|
|
"""Try to gather NVidia GPU device information via libcudart."""
|
|
dev_info = []
|
|
|
|
system = platform.system()
|
|
if system == "Linux":
|
|
libcudart = ct.cdll.LoadLibrary("libcudart.so")
|
|
elif system == "Darwin":
|
|
libcudart = ct.cdll.LoadLibrary("libcudart.dylib")
|
|
elif system == "Windows":
|
|
libcudart = ct.windll.LoadLibrary("libcudart.dll")
|
|
else:
|
|
raise NotImplementedError("Cannot identify system.")
|
|
|
|
version = ct.c_int()
|
|
rc = libcudart.cudaRuntimeGetVersion(ct.byref(version))
|
|
if rc != 0:
|
|
raise ValueError("Could not get version")
|
|
if version.value < 6050:
|
|
raise NotImplementedError("CUDA version must be between >= 6.5")
|
|
|
|
device_count = ct.c_int()
|
|
libcudart.cudaGetDeviceCount(ct.byref(device_count))
|
|
|
|
for i in range(device_count.value):
|
|
properties = CUDADeviceProperties()
|
|
rc = libcudart.cudaGetDeviceProperties(ct.byref(properties), i)
|
|
if rc != 0:
|
|
raise ValueError("Could not get device properties")
|
|
pci_bus_id = " " * 13
|
|
rc = libcudart.cudaDeviceGetPCIBusId(ct.c_char_p(pci_bus_id), 13, i)
|
|
if rc != 0:
|
|
raise ValueError("Could not get device PCI bus id")
|
|
|
|
info = test_log_pb2.GPUInfo() # No UUID available
|
|
info.model = properties.name
|
|
info.bus_id = pci_bus_id
|
|
dev_info.append(info)
|
|
|
|
del properties
|
|
|
|
return dev_info
|
|
|
|
|
|
def gather_gpu_devices():
|
|
"""Gather gpu device info.
|
|
|
|
Returns:
|
|
A list of test_log_pb2.GPUInfo messages.
|
|
"""
|
|
try:
|
|
# Prefer using /proc if possible, it provides the UUID.
|
|
dev_info = _gather_gpu_devices_proc()
|
|
if not dev_info:
|
|
raise ValueError("No devices found")
|
|
return dev_info
|
|
except (IOError, ValueError, errors.OpError):
|
|
pass
|
|
|
|
try:
|
|
# Fall back on using libcudart
|
|
return _gather_gpu_devices_cudart()
|
|
except (OSError, ValueError, NotImplementedError, errors.OpError):
|
|
return []
|