Document serialization format

This commit is contained in:
Reuben Morais 2019-11-05 09:15:18 +01:00
parent bd6a9d03b1
commit 10c652b420
2 changed files with 9 additions and 0 deletions

View File

@ -46,6 +46,7 @@ public:
} }
int deserialize(const char* buffer, const int buffer_size) { int deserialize(const char* buffer, const int buffer_size) {
// See util/text.py for an explanation of the serialization format.
int offset = 0; int offset = 0;
if (buffer_size - offset < sizeof(uint16_t)) { if (buffer_size - offset < sizeof(uint16_t)) {
return 1; return 1;

View File

@ -54,10 +54,18 @@ class Alphabet(object):
return res return res
def serialize(self): def serialize(self):
# Serialization format is a sequence of (key, value) pairs, where key is
# a uint16_t and value is a uint16_t length followed by `length` UTF-8
# encoded bytes with the label.
res = bytearray() res = bytearray()
# We start by writing the number of pairs in the buffer as uint16_t.
res += struct.pack('<H', self._size) res += struct.pack('<H', self._size)
for key, value in self._label_to_str.items(): for key, value in self._label_to_str.items():
value = value.encode('utf-8') value = value.encode('utf-8')
# struct.pack only takes fixed length strings/buffers, so we have to
# construct the correct format string with the length of the encoded
# label.
res += struct.pack('<HH{}s'.format(len(value)), key, len(value), value) res += struct.pack('<HH{}s'.format(len(value)), key, len(value), value)
return bytes(res) return bytes(res)