From dedaa43945a2f90e5b6a815401f6991a64c7e947 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 27 May 2016 11:50:16 -0800 Subject: [PATCH] Add a new contrib/avro directory and the initial schema that resembles tf.train.Example. Change: 123445810 --- avro.BUILD | 35 ++++++++++++++ boost.BUILD | 58 +++++++++++++++++++++++ bzip2.BUILD | 36 ++++++++++++++ tensorflow/contrib/avro/BUILD | 30 ++++++++++++ tensorflow/contrib/avro/README.md | 4 ++ tensorflow/contrib/avro/__init__.py | 0 tensorflow/contrib/avro/example.json | 71 ++++++++++++++++++++++++++++ tensorflow/workspace.bzl | 29 ++++++++++++ third_party/avro/BUILD | 4 ++ third_party/avro/build_defs.bzl | 28 +++++++++++ zlib.BUILD | 12 +++++ 11 files changed, 307 insertions(+) create mode 100644 avro.BUILD create mode 100644 boost.BUILD create mode 100644 bzip2.BUILD create mode 100644 tensorflow/contrib/avro/BUILD create mode 100644 tensorflow/contrib/avro/README.md create mode 100644 tensorflow/contrib/avro/__init__.py create mode 100644 tensorflow/contrib/avro/example.json create mode 100644 third_party/avro/BUILD create mode 100644 third_party/avro/build_defs.bzl create mode 100644 zlib.BUILD diff --git a/avro.BUILD b/avro.BUILD new file mode 100644 index 00000000000..5e73c1a6783 --- /dev/null +++ b/avro.BUILD @@ -0,0 +1,35 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +prefix_dir = "avro-cpp-1.8.0" + +cc_library( + name = "avrocpp", + srcs = glob( + [ + prefix_dir + "/impl/**/*.cc", + prefix_dir + "/impl/**/*.hh", + ], + exclude = [ + prefix_dir + "/impl/avrogencpp.cc", + ], + ), + hdrs = glob([prefix_dir + "/api/**/*.hh"]), + includes = [prefix_dir + "/api"], + deps = [ + "@boost_archive//:boost", + "@boost_archive//:filesystem", + "@boost_archive//:iostreams", + "@boost_archive//:system", + ], +) + +cc_binary( + name = "avrogencpp", + srcs = [prefix_dir + "/impl/avrogencpp.cc"], + deps = [ + ":avrocpp", + "@boost_archive//:program_options", + ], +) diff --git a/boost.BUILD b/boost.BUILD new file mode 100644 index 00000000000..da47abdb6c0 --- /dev/null +++ b/boost.BUILD @@ -0,0 +1,58 @@ +# Description: +# The Boost library collection (http://www.boost.org) +# +# Most Boost libraries are header-only, in which case you only need to depend +# on :boost. If you need one of the libraries that has a separately-compiled +# implementation, depend on the appropriate libs rule. + +# This is only needed for Avro. +package(default_visibility = ["@avro_archive//:__subpackages__"]) + +licenses(["notice"]) # Boost software license + +prefix_dir = "boost_1_61_0" + +cc_library( + name = "boost", + hdrs = glob([ + prefix_dir + "/boost/**/*.hpp", + prefix_dir + "/boost/**/*.h", + prefix_dir + "/boost/**/*.ipp", + ]), + includes = [prefix_dir], +) + +cc_library( + name = "filesystem", + srcs = glob([prefix_dir + "/libs/filesystem/src/*.cpp"]), + deps = [ + ":boost", + ":system", + ], +) + +cc_library( + name = "iostreams", + srcs = glob([prefix_dir + "/libs/iostreams/src/*.cpp"]), + deps = [ + ":boost", + "@bzip2_archive//:bz2lib", + "@zlib_archive//:zlib", + ], +) + +cc_library( + name = "program_options", + srcs = glob([prefix_dir + "/libs/program_options/src/*.cpp"]), + deps = [ + ":boost", + ], +) + +cc_library( + name = "system", + srcs = glob([prefix_dir + "/libs/system/src/*.cpp"]), + deps = [ + ":boost", + ], +) diff --git a/bzip2.BUILD b/bzip2.BUILD new file mode 100644 index 00000000000..42e16df6716 --- /dev/null +++ b/bzip2.BUILD @@ -0,0 +1,36 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # BSD derivative + +prefix_dir = "bzip2-1.0.6" + +BZ2LIB_SRCS = [ + # these are in the same order as their corresponding .o files are in OBJS in + # Makefile (rather than lexicographic order) for easy comparison (that they + # are identical). + "blocksort.c", + "huffman.c", + "crctable.c", + "randtable.c", + "compress.c", + "decompress.c", + "bzlib.c", +] + +cc_library( + name = "bz2lib", + srcs = [prefix_dir + "/" + source for source in BZ2LIB_SRCS] + + [prefix_dir + "/bzlib_private.h"], + hdrs = [prefix_dir + "/bzlib.h"], + includes = [prefix_dir], +) + +cc_binary( + name = "bzip2", + srcs = [ + "bzip2.c", + ], + deps = [ + ":bz2lib", + ], +) diff --git a/tensorflow/contrib/avro/BUILD b/tensorflow/contrib/avro/BUILD new file mode 100644 index 00000000000..274861c9572 --- /dev/null +++ b/tensorflow/contrib/avro/BUILD @@ -0,0 +1,30 @@ +# Description: +# Contains ops for reading and writing Apache Avro files. +# (https://avro.apache.org/) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +package(default_visibility = ["//tensorflow:__subpackages__"]) + +load("//third_party/avro:build_defs.bzl", "avro_gen_cpp") + +avro_gen_cpp( + name = "example_h", + srcs = ["example.json"], + outs = ["example.h"], + namespace = "tensorflow", +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/avro/README.md b/tensorflow/contrib/avro/README.md new file mode 100644 index 00000000000..cd9fa95d2fd --- /dev/null +++ b/tensorflow/contrib/avro/README.md @@ -0,0 +1,4 @@ +# TensorFlow Avro support + +This directory contains code for reading and writing +[Apache Avro](https://avro.apache.org/) data in TensorFlow. \ No newline at end of file diff --git a/tensorflow/contrib/avro/__init__.py b/tensorflow/contrib/avro/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tensorflow/contrib/avro/example.json b/tensorflow/contrib/avro/example.json new file mode 100644 index 00000000000..c0e7868f831 --- /dev/null +++ b/tensorflow/contrib/avro/example.json @@ -0,0 +1,71 @@ +{ + "type": "record", + "name": "Example", + "fields": [ + { + "name": "features", + "type": { + "type": "record", + "name": "Features", + "fields": [ + { + "name": "feature", + "type": { + "type": "map", + "values": { + "type": "record", + "name": "Feature", + "fields": [ + { + "name": "values", + "type": [ + { + "type": "record", + "name": "BytesList", + "fields": [ + { + "name": "value", + "type": { + "type": "array", + "items": "bytes" + } + } + ] + }, + { + "type": "record", + "name": "FloatList", + "fields": [ + { + "name": "value", + "type": { + "type": "array", + "items": "float" + } + } + ] + }, + { + "type": "record", + "name": "Int64List", + "fields": [ + { + "name": "value", + "type": { + "type": "array", + "items": "long" + } + } + ] + } + ] + } + ] + } + } + } + ] + } + } + ] +} diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5632f6850b7..8f19d80b0e5 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -145,3 +145,32 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): remote = "https://boringssl.googlesource.com/boringssl", build_file = path_prefix + "boringssl.BUILD", ) + + native.new_http_archive( + name = "avro_archive", + url = "http://www-us.apache.org/dist/avro/avro-1.8.0/cpp/avro-cpp-1.8.0.tar.gz", + sha256 = "ec6e2ec957e95ca07f70cc25f02f5c416f47cb27bd987a6ec770dcbe72527368", + build_file = path_prefix + "avro.BUILD", + ) + + native.new_http_archive( + name = "boost_archive", + url = "http://pilotfiber.dl.sourceforge.net/project/boost/boost/1.61.0/boost_1_61_0.tar.gz", + sha256 = "a77c7cc660ec02704c6884fbb20c552d52d60a18f26573c9cee0788bf00ed7e6", + build_file = path_prefix + "boost.BUILD", + ) + + native.new_http_archive( + name = "bzip2_archive", + url = "http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz", + sha256 = "a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd", + build_file = path_prefix + "bzip2.BUILD", + ) + + native.new_http_archive( + name = "zlib_archive", + url = "http://zlib.net/zlib-1.2.8.tar.gz", + sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d", + build_file = path_prefix + "zlib.BUILD", + ) + diff --git a/third_party/avro/BUILD b/third_party/avro/BUILD new file mode 100644 index 00000000000..5d154c195cf --- /dev/null +++ b/third_party/avro/BUILD @@ -0,0 +1,4 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + diff --git a/third_party/avro/build_defs.bzl b/third_party/avro/build_defs.bzl new file mode 100644 index 00000000000..91404098769 --- /dev/null +++ b/third_party/avro/build_defs.bzl @@ -0,0 +1,28 @@ +"""Build extension for generating C++ header file from an Avro schema. + +Example usage: + +load("//third_party/avro:build_defs.bzl", "avro_gen_cpp") + +avro_gen_cpp( + name = "myrule", + srcs = ["myschema.json"], + outs = ["myschema.h"], + namespace = "mynamespace", +) +""" + +def avro_gen_cpp(name, srcs, outs, namespace, visibility=None): + native.genrule( + name = name, + srcs = srcs, + outs = outs, + cmd = ("$(location @avro_archive//:avrogencpp)" + + " --include-prefix external/avro_archive/avro-cpp-1.8.0/api" + + " --namespace " + namespace + + " --no-union-typedef" + + " --input $(SRCS)" + + " --output $@"), + tools = ["@avro_archive//:avrogencpp"], + visibility = visibility, + ) diff --git a/zlib.BUILD b/zlib.BUILD new file mode 100644 index 00000000000..9e0ce538788 --- /dev/null +++ b/zlib.BUILD @@ -0,0 +1,12 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # BSD/MIT-like license (for zlib) + +prefix_dir = "zlib-1.2.8" + +cc_library( + name = "zlib", + srcs = glob([prefix_dir + "/*.c"]), + hdrs = glob([prefix_dir + "/*.h"]), + includes = [prefix_dir], +)