From 6d48337d3f5090bcd630c275f4a8584a3d5b6f72 Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Mon, 31 Aug 2020 12:23:59 -0700 Subject: [PATCH 1/8] TFLite (tools) added reverse_xxd_dump_from_cc.py --- .../lite/tools/reverse_xxd_dump_from_cc.py | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 tensorflow/lite/tools/reverse_xxd_dump_from_cc.py diff --git a/tensorflow/lite/tools/reverse_xxd_dump_from_cc.py b/tensorflow/lite/tools/reverse_xxd_dump_from_cc.py new file mode 100644 index 00000000000..ec5a79a3964 --- /dev/null +++ b/tensorflow/lite/tools/reverse_xxd_dump_from_cc.py @@ -0,0 +1,110 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""This tool reverses xxd dump from *.cc source file to its original binary file + +This script is used to convert models from C++ source file (dumped with xxd) to +the binary model weight file and analyze it with model visualizer like Netron +(https://github.com/lutzroeder/netron) or load the model in TensorFlow Python API +to evaluate the results in Python. + +The command to dump binary file to C++ source file looks like + +xxd -i model_data.tflite > model_data.cc + +Example usage: + +python reverse_xxd_dump_from_cc.py model_data.cc --output=model_data.tflite +""" +import argparse +import os +import re + + +def generate_default_output(filename, postfix=None, extension=None): + """Generate output filename given the filename and extension + + Args: + filename(str): Input filename + postfix(str): Postfix to add to the output filename + extension(str): Output file extension, if not given, it will be + the same as input file. + + Return: + string for the output filename given input args + """ + name, ext = os.path.splitext(filename) + + if extension is not None: + if not extension.startswith("."): + extension = "." + extension + + ext = extension + + if postfix is None: + postfix = "" + + output = "{}{}{}".format(name, postfix, ext) + + return output + + +def reverse_dump(filename, output=None, extension=".tflite"): + """Reverse dump the tensorflow model weight from C++ array source array + + Args: + filename(str): Input filename (the input *.cc file) + output(str): Output filename, default to be same as input file but + with different extension, default extension is *.tflite + """ + if output is None: + output = generate_default_output(filename, extension=extension) + + # Pattern to match with hexadecimal value in the array + pattern = re.compile(r"\W*(0x[0-9a-fA-F,x ]+).*") + + array = bytearray() + with open(filename) as f: + for line in f: + values_match = pattern.match(line) + + if values_match is None: + continue + + # Match in the parentheses (hex array only) + list_text = values_match.group(1) + # Extract hex values (text) + values_text = filter(None, list_text.split(",")) + # Convert to hex + values = [int(x, base=16) for x in values_text] + + array.extend(values) + + with open(output, 'wb') as f: + f.write(array) + + print("Byte data written to `{}`".format(output)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "source", + type=str, + help="C/C++ source file dumped from `xxd -i [HEX_FILE]`") + parser.add_argument("-o", "--output", type=str, help="Output filename") + + args = parser.parse_args() + + reverse_dump(args.source, args.output) From 9cfa6ac763c687231da51b80438326d72b426798 Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Tue, 1 Sep 2020 10:04:05 -0700 Subject: [PATCH 2/8] TFLite(tools): add reverse_xxd_dump_from_cc to py_binary BUILD target --- tensorflow/lite/tools/BUILD | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/lite/tools/BUILD b/tensorflow/lite/tools/BUILD index c29d30e750e..a94aca941c9 100644 --- a/tensorflow/lite/tools/BUILD +++ b/tensorflow/lite/tools/BUILD @@ -84,6 +84,17 @@ py_binary( ], ) +py_binary( + name = "reverse_xxd_dump_from_cc", + srcs = ["reverse_xxd_dump_from_cc.py"], + python_version = "PY3", + srcs_version = "PY2AND3", + deps = [ + ":flatbuffer_utils", + "//tensorflow/python:platform", + ], +) + py_binary( name = "randomize_weights", srcs = ["randomize_weights.py"], From 517060db461e2c39f149a1899c1cca173214c0ce Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Tue, 1 Sep 2020 10:25:09 -0700 Subject: [PATCH 3/8] TFLite (tools): Factored out reverse_xxd_dump_from_cc to flatbuffer_utils --- tensorflow/lite/tools/flatbuffer_utils.py | 57 +++++++++ .../lite/tools/reverse_xxd_dump_from_cc.py | 109 ++++++------------ 2 files changed, 90 insertions(+), 76 deletions(-) diff --git a/tensorflow/lite/tools/flatbuffer_utils.py b/tensorflow/lite/tools/flatbuffer_utils.py index 3171759201c..b5256ef936c 100644 --- a/tensorflow/lite/tools/flatbuffer_utils.py +++ b/tensorflow/lite/tools/flatbuffer_utils.py @@ -28,6 +28,7 @@ from __future__ import print_function import copy import os import random +import re import flatbuffers from tensorflow.lite.python import schema_py_generated as schema_fb @@ -153,3 +154,59 @@ def randomize_weights(model, random_seed=0): # end up as denormalized or NaN/Inf floating point numbers. for j in range(buffer_i_size): buffer_i_data[j] = random.randint(0, 255) + + +def xxd_output_to_bytearray(input_cc_file): + """Converts xxd output C++ source file to bytearray + + Args: + input_cc_file: Full path name to th C++ source file dumped by xxd + + Raises: + RuntimeError: If input_cc_file path is invalid. + IOError: If input_cc_file cannot be opened. + + Returns: + A bytearray corresponding to the input cc file array. + """ + # Match hex values in the string with comma as separator + pattern = re.compile(r"\W*(0x[0-9a-fA-F,x ]+).*") + + model_bytearray = bytearray() + + with open(input_cc_file) as file_handle: + for line in file_handle: + values_match = pattern.match(line) + + if values_match is None: + continue + + # Match in the parentheses (hex array only) + list_text = values_match.group(1) + + # Extract hex values (text) from the line + # e.g. 0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, + values_text = filter(None, list_text.split(",")) + + # Convert to hex + values = [int(x, base=16) for x in values_text] + model_bytearray.extend(values) + + return model_bytearray + + +def xxd_output_to_object(input_cc_file): + """Converts xxd output C++ source file to object + + Args: + input_cc_file: Full path name to th C++ source file dumped by xxd + + Raises: + RuntimeError: If input_cc_file path is invalid. + IOError: If input_cc_file cannot be opened. + + Returns: + A python object corresponding to the input tflite file. + """ + model_bytearray = xxd_output_to_bytearray(input_cc_file) + return convert_bytearray_to_object(model_bytearray) diff --git a/tensorflow/lite/tools/reverse_xxd_dump_from_cc.py b/tensorflow/lite/tools/reverse_xxd_dump_from_cc.py index ec5a79a3964..c6a3c16d78f 100644 --- a/tensorflow/lite/tools/reverse_xxd_dump_from_cc.py +++ b/tensorflow/lite/tools/reverse_xxd_dump_from_cc.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -r"""This tool reverses xxd dump from *.cc source file to its original binary file +r"""Reverses xxd dump from to binary file This script is used to convert models from C++ source file (dumped with xxd) to the binary model weight file and analyze it with model visualizer like Netron @@ -25,86 +25,43 @@ xxd -i model_data.tflite > model_data.cc Example usage: -python reverse_xxd_dump_from_cc.py model_data.cc --output=model_data.tflite +python reverse_xxd_dump_from_cc.py \ + --input_cc_file=model_data.cc \ + --output_tflite_file=model_data.tflite """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import argparse -import os -import re +import sys + +from tensorflow.lite.tools import flatbuffer_utils +from tensorflow.python.platform import app -def generate_default_output(filename, postfix=None, extension=None): - """Generate output filename given the filename and extension - - Args: - filename(str): Input filename - postfix(str): Postfix to add to the output filename - extension(str): Output file extension, if not given, it will be - the same as input file. - - Return: - string for the output filename given input args - """ - name, ext = os.path.splitext(filename) - - if extension is not None: - if not extension.startswith("."): - extension = "." + extension - - ext = extension - - if postfix is None: - postfix = "" - - output = "{}{}{}".format(name, postfix, ext) - - return output - - -def reverse_dump(filename, output=None, extension=".tflite"): - """Reverse dump the tensorflow model weight from C++ array source array - - Args: - filename(str): Input filename (the input *.cc file) - output(str): Output filename, default to be same as input file but - with different extension, default extension is *.tflite - """ - if output is None: - output = generate_default_output(filename, extension=extension) - - # Pattern to match with hexadecimal value in the array - pattern = re.compile(r"\W*(0x[0-9a-fA-F,x ]+).*") - - array = bytearray() - with open(filename) as f: - for line in f: - values_match = pattern.match(line) - - if values_match is None: - continue - - # Match in the parentheses (hex array only) - list_text = values_match.group(1) - # Extract hex values (text) - values_text = filter(None, list_text.split(",")) - # Convert to hex - values = [int(x, base=16) for x in values_text] - - array.extend(values) - - with open(output, 'wb') as f: - f.write(array) - - print("Byte data written to `{}`".format(output)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() +def main(_): + """Application run loop.""" + parser = argparse.ArgumentParser( + description='Reverses xxd dump from to binary file') parser.add_argument( - "source", - type=str, - help="C/C++ source file dumped from `xxd -i [HEX_FILE]`") - parser.add_argument("-o", "--output", type=str, help="Output filename") + '--input_cc_file', + type=str, + required=True, + help='Full path name to the input cc file.') + parser.add_argument( + '--output_tflite_file', + type=str, + required=True, + help='Full path name to the stripped output tflite file.') args = parser.parse_args() - reverse_dump(args.source, args.output) + # Read the model from xxd output C++ source file + model = flatbuffer_utils.xxd_output_to_object(args.input_cc_file) + # Write the model + flatbuffer_utils.write_model(model, args.output_tflite_file) + + +if __name__ == '__main__': + app.run(main=main, argv=sys.argv[:1]) From bf0764d49138a770dbadebf9cfc3c56aa8ef641d Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Tue, 1 Sep 2020 11:36:01 -0700 Subject: [PATCH 4/8] TFLite (tools): renamed xxd_output_to_bytearray to xxd_output_to_bytes --- tensorflow/lite/tools/flatbuffer_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/tools/flatbuffer_utils.py b/tensorflow/lite/tools/flatbuffer_utils.py index b5256ef936c..25251ed925d 100644 --- a/tensorflow/lite/tools/flatbuffer_utils.py +++ b/tensorflow/lite/tools/flatbuffer_utils.py @@ -156,8 +156,8 @@ def randomize_weights(model, random_seed=0): buffer_i_data[j] = random.randint(0, 255) -def xxd_output_to_bytearray(input_cc_file): - """Converts xxd output C++ source file to bytearray +def xxd_output_to_bytes(input_cc_file): + """Converts xxd output C++ source file to bytes (immutable) Args: input_cc_file: Full path name to th C++ source file dumped by xxd @@ -192,7 +192,7 @@ def xxd_output_to_bytearray(input_cc_file): values = [int(x, base=16) for x in values_text] model_bytearray.extend(values) - return model_bytearray + return bytes(model_bytearray) def xxd_output_to_object(input_cc_file): @@ -208,5 +208,5 @@ def xxd_output_to_object(input_cc_file): Returns: A python object corresponding to the input tflite file. """ - model_bytearray = xxd_output_to_bytearray(input_cc_file) - return convert_bytearray_to_object(model_bytearray) + model_bytes = xxd_output_to_bytes(input_cc_file) + return convert_bytearray_to_object(model_bytes) From c7d3d6eee25a1d074e9cbc6ce63adbcd792a31e9 Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Tue, 1 Sep 2020 11:43:48 -0700 Subject: [PATCH 5/8] TFLite (tools): added test for xxd_output_to_bytes; fixed styling --- tensorflow/lite/tools/flatbuffer_utils.py | 2 +- .../lite/tools/flatbuffer_utils_test.py | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/tools/flatbuffer_utils.py b/tensorflow/lite/tools/flatbuffer_utils.py index 25251ed925d..391132307df 100644 --- a/tensorflow/lite/tools/flatbuffer_utils.py +++ b/tensorflow/lite/tools/flatbuffer_utils.py @@ -158,7 +158,7 @@ def randomize_weights(model, random_seed=0): def xxd_output_to_bytes(input_cc_file): """Converts xxd output C++ source file to bytes (immutable) - + Args: input_cc_file: Full path name to th C++ source file dumped by xxd diff --git a/tensorflow/lite/tools/flatbuffer_utils_test.py b/tensorflow/lite/tools/flatbuffer_utils_test.py index 60235b06bc8..13a63c4f8b0 100644 --- a/tensorflow/lite/tools/flatbuffer_utils_test.py +++ b/tensorflow/lite/tools/flatbuffer_utils_test.py @@ -159,5 +159,34 @@ class RandomizeWeightsTest(test_util.TensorFlowTestCase): self.assertNotEqual(initial_buffer.data[j], final_buffer.data[j]) +class XxdOutputToBytesTest(test_util.TensorFlowTestCase): + + def testXxdOutputToBytes(self): + # 1. SETUP + # Define the initial model + initial_model = test_utils.build_mock_model() + initial_bytes = flatbuffer_utils.convert_object_to_bytearray(initial_model) + + # Define temporary files + tmp_dir = self.get_temp_dir() + model_filename = os.path.join(tmp_dir, 'model.tflite') + + # 2. INVOKE + # Invoke the write_model and read_model functions + flatbuffer_utils.write_model(initial_model, model_filename) + + # 3. DUMP WITH xxd + input_cc_file = os.path.join(tmp_dir, 'model.cc') + + command = "xxd -i {} > {}".format(model_filename, input_cc_file) + subprocess.call(command, shell=True) + + # 4. VALIDATE + final_bytes = flatbuffer_utils.xxd_output_to_bytes(input_cc_file) + + # Validate that the initial and final bytearray are the same + self.assertEqual(initial_bytes, final_bytes) + + if __name__ == '__main__': test.main() From 61e4fc8367738776e6a35cc9b5423d201fe8c49a Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Tue, 1 Sep 2020 11:47:34 -0700 Subject: [PATCH 6/8] TFLite (tools): added comment [immutable] in convert_object_to_bytearray --- tensorflow/lite/tools/flatbuffer_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/tools/flatbuffer_utils.py b/tensorflow/lite/tools/flatbuffer_utils.py index 391132307df..275064f7d4c 100644 --- a/tensorflow/lite/tools/flatbuffer_utils.py +++ b/tensorflow/lite/tools/flatbuffer_utils.py @@ -82,7 +82,7 @@ def read_model_with_mutable_tensors(input_tflite_file): def convert_object_to_bytearray(model_object): - """Converts a tflite model from an object to a bytearray.""" + """Converts a tflite model from an object to a immutable bytearray.""" # Initial size of the buffer, which will grow automatically if needed builder = flatbuffers.Builder(1024) model_offset = model_object.Pack(builder) From 2562470e9a6841d87b221c571546a3ac481ea0cb Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Tue, 1 Sep 2020 14:21:47 -0700 Subject: [PATCH 7/8] TFLite (tools): improved readability of comments in testXxdOutputToBytes --- tensorflow/lite/tools/flatbuffer_utils_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/lite/tools/flatbuffer_utils_test.py b/tensorflow/lite/tools/flatbuffer_utils_test.py index 13a63c4f8b0..50a99a6a5e0 100644 --- a/tensorflow/lite/tools/flatbuffer_utils_test.py +++ b/tensorflow/lite/tools/flatbuffer_utils_test.py @@ -171,8 +171,7 @@ class XxdOutputToBytesTest(test_util.TensorFlowTestCase): tmp_dir = self.get_temp_dir() model_filename = os.path.join(tmp_dir, 'model.tflite') - # 2. INVOKE - # Invoke the write_model and read_model functions + # 2. Write model to temporary file (will be used as input for xxd) flatbuffer_utils.write_model(initial_model, model_filename) # 3. DUMP WITH xxd From 3a46fe6f4e564ea2ee951a324685f1ae118b624a Mon Sep 17 00:00:00 2001 From: danielyou0230 Date: Tue, 1 Sep 2020 14:27:45 -0700 Subject: [PATCH 8/8] TFLite (tools): added missing module in flatbuffer_utils_test --- tensorflow/lite/tools/flatbuffer_utils_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/tools/flatbuffer_utils_test.py b/tensorflow/lite/tools/flatbuffer_utils_test.py index 50a99a6a5e0..09c89293afe 100644 --- a/tensorflow/lite/tools/flatbuffer_utils_test.py +++ b/tensorflow/lite/tools/flatbuffer_utils_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import copy import os +import subprocess from tensorflow.lite.tools import flatbuffer_utils from tensorflow.lite.tools import test_utils