diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5d981745a149c6edca9020a0e10b1b765240f72..3670afe3a05eba0fca55f2e7e4db268035e5b0c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,7 @@ set(FlatBuffers_Library_SRCS
   include/flatbuffers/util.h
   include/flatbuffers/reflection.h
   include/flatbuffers/reflection_generated.h
+  include/flatbuffers/flexbuffers.h
   src/code_generators.cpp
   src/idl_parser.cpp
   src/idl_gen_text.cpp
diff --git a/docs/source/FlatBuffers.md b/docs/source/FlatBuffers.md
index d69f404fbe7b6da5c8c058d1af9ba61643368493..d228bd402cd0311b6ac9312790d757c2a462f6b5 100644
--- a/docs/source/FlatBuffers.md
+++ b/docs/source/FlatBuffers.md
@@ -78,6 +78,9 @@ inefficiency, but also forces you to write *more* code to access data
 In this context, it is only a better choice for systems that have very
 little to no information ahead of time about what data needs to be stored.
 
+If you do need to store data that doesn't fit a schema, FlatBuffers also
+offers a schema-less (self-describing) version!
+
 Read more about the "why" of FlatBuffers in the
 [white paper](@ref flatbuffers_white_paper).
 
@@ -138,6 +141,8 @@ sections provide a more in-depth usage guide.
     using FlatBuffers.
 -   A [white paper](@ref flatbuffers_white_paper) explaining the "why" of
     FlatBuffers.
+-   How to use the [schema-less](@ref flexbuffers) version of
+    FlatBuffers.
 -   A description of the [internals](@ref flatbuffers_internals) of FlatBuffers.
 -   A formal [grammar](@ref flatbuffers_grammar) of the schema language.
 
diff --git a/docs/source/FlexBuffers.md b/docs/source/FlexBuffers.md
new file mode 100644
index 0000000000000000000000000000000000000000..bcdd6274c079e5fc46a4dbbc6cdb9348b846eaad
--- /dev/null
+++ b/docs/source/FlexBuffers.md
@@ -0,0 +1,156 @@
+FlexBuffers    {#flexbuffers}
+==========
+
+FlatBuffers was designed around schemas, because when you want maximum
+performance and data consistency, strong typing is helpful.
+
+There are however times when you want to store data that doesn't fit a
+schema, because you can't know ahead of time what all needs to be stored.
+
+For this, FlatBuffers has a dedicated format, called FlexBuffers.
+This is a binary format that can be used in conjunction
+with FlatBuffers (by storing a part of a buffer in FlexBuffers
+format), or also as its own independent serialization format.
+
+While it loses the strong typing, you retain the most unique advantage
+FlatBuffers has over other serialization formats (schema-based or not):
+FlexBuffers can also be accessed without parsing / copying / object allocation.
+This is a huge win in efficiency / memory friendly-ness, and allows unique
+use cases such as mmap-ing large amounts of free-form data.
+
+FlexBuffers design and implementation allows for a very compact encoding,
+combining automatic pooling of strings with automatic sizing of containers to
+their smallest possible representation (8/16/32/64 bits). Many values and
+offsets can be encoded in just 8 bits. While a schema-less representation is
+usually more bulky because of the need to be self-descriptive, FlexBuffers
+generates smaller binaries for many cases than regular FlatBuffers.
+
+FlexBuffers is still slower than regular FlatBuffers though, so we recommend to
+only use it if you need it.
+
+
+# Usage
+
+This is for C++, other languages may follow.
+
+Include the header `flexbuffers.h`, which in turn depends on `flatbuffers.h`
+and `util.h`.
+
+To create a buffer:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
+flexbuffers::Builder fbb;
+fbb.Int(13);
+fbb.Finish();
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You create any value, followed by `Finish`. Unlike FlatBuffers which requires
+the root value to be a table, here any value can be the root, including a lonely
+int value.
+
+You can now access the `std::vector<uint8_t>` that contains the encoded value
+as `fbb.GetBuffer()`. Write it, send it, or store it in a parent FlatBuffer. In
+this case, the buffer is just 3 bytes in size.
+
+To read this value back, you could just say:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
+auto root = flexbuffers::GetRoot(my_buffer);
+int64_t i = root.AsInt64();
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+FlexBuffers stores ints only as big as needed, so it doesn't differentiate
+between different sizes of ints. You can ask for the 64 bit version,
+regardless of what you put in. In fact, since you demand to read the root
+as an int, if you supply a buffer that actually contains a float, or a
+string with numbers in it, it will convert it for you on the fly as well,
+or return 0 if it can't. If instead you actually want to know what is inside
+the buffer before you access it, you can call `root.GetType()` or `root.IsInt()`
+etc.
+
+Here's a slightly more complex value you could write instead of `fbb.Int` above:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
+fbb.Map([&]() {
+  fbb.Vector("vec", [&]() {
+    fbb.Int(-100);
+    fbb.String("Fred");
+    fbb.IndirectFloat(4.0f);
+  });
+  fbb.UInt("foo", 100);
+});
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This stores the equivalent of the JSON value
+`{ vec: [ -100, "Fred", 4.0 ], foo: 100 }`. The root is a dictionary that has
+just two key-value pairs, with keys `vec` and `foo`. Unlike FlatBuffers, it
+actually has to store these keys in the buffer (which it does only once if
+you store multiple such objects, by pooling key values), but also unlike
+FlatBuffers it has no restriction on the keys (fields) that you use.
+
+The map constructor uses a C++11 Lambda to group its children, but you can
+also use more conventional start/end calls if you prefer.
+
+The first value in the map is a vector. You'll notice that unlike FlatBuffers,
+you can use mixed types. There is also a `TypedVector` variant that only
+allows a single type, and uses a bit less memory.
+
+`IndirectFloat` is an interesting feature that allows you to store values
+by offset rather than inline. Though that doesn't make any visible change
+to the user, the consequence is that large values (especially doubles or
+64 bit ints) that occur more than once can be shared. Another use case is
+inside of vectors, where the largest element makes up the size of all elements
+(e.g. a single double forces all elements to 64bit), so storing a lot of small
+integers together with a double is more efficient if the double is indirect.
+
+Accessing it:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
+auto map = flexbuffers::GetRoot(my_buffer).AsMap();
+map.size();  // 2
+auto vec = map["vec"].AsVector();
+vec.size();  // 3
+vec[0].AsInt64();  // -100;
+vec[1].AsString().c_str();  // "Fred";
+vec[1].AsInt64();  // 0 (Number parsing failed).
+vec[2].AsDouble();  // 4.0
+vec[2].AsString().IsTheEmptyString();  // true (Wrong Type).
+vec[2].AsString().c_str();  // "" (This still works though).
+vec[2].ToString().c_str();  // "4" (Or have it converted).
+map["foo"].AsUInt8();  // 100
+map["unknown"].IsNull();  // true
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+# Binary encoding
+
+A description of how FlexBuffers are encoded is in the
+[internals](@ref flatbuffers_internals) document.
+
+
+# Efficiency tips
+
+* Vectors generally are a lot more efficient than maps, so prefer them over maps
+  when possible for small objects. Instead of a map with keys `x`, `y` and `z`,
+  use a vector. Better yet, use a typed vector. Or even better, use a fixed
+  size typed vector.
+* Maps are backwards compatible with vectors, and can be iterated as such.
+  You can iterate either just the values (`map.Values()`), or in parallel with
+  the keys vector (`map.Keys()`). If you intend
+  to access most or all elements, this is faster than looking up each element
+  by key, since that involves a binary search of the key vector.
+* When possible, don't mix values that require a big bit width (such as double)
+  in a large vector of smaller values, since all elements will take on this
+  width. Use `IndirectDouble` when this is a possibility. Note that
+  integers automatically use the smallest width possible, i.e. if you ask
+  to serialize an int64_t whose value is actually small, you will use less
+  bits. Doubles are represented as floats whenever possible losslessly, but
+  this is only possible for few values.
+  Since nested vectors/maps are stored over offsets, they typically don't
+  affect the vector width.
+* To store large arrays of byte data, use a blob. If you'd use a typed
+  vector, the bit width of the size field may make it use more space than
+  expected, and may not be compatible with `memcpy`.
+  Similarly, large arrays of (u)int16_t may be better off stored as a
+  binary blob if their size could exceed 64k elements.
+  Construction and use are otherwise similar to strings.
diff --git a/docs/source/Internals.md b/docs/source/Internals.md
index a7dce4fb0a7e873e8c286a73a8fffe1bd740147a..de7a07e6cddb518679dfb10741f0deaca1ea7add 100755
--- a/docs/source/Internals.md
+++ b/docs/source/Internals.md
@@ -292,4 +292,148 @@ flexibility in which of the children of root object to write first (though in
 this case there's only one string), and what order to write the fields in.
 Different orders may also cause different alignments to happen.
 
+# FlexBuffers
+
+The [schema-less](@ref flexbuffers) version of FlatBuffers have their
+own encoding, detailed here.
+
+It shares many properties mentioned above, in that all data is accessed
+over offsets, all scalars are aligned to their own size, and
+all data is always stored in little endian format.
+
+One difference is that FlexBuffers are built front to back, so children are
+stored before parents, and the root of the data starts at the last byte.
+
+Another difference is that scalar data is stored with a variable number of bits
+(8/16/32/64). The current width is always determined by the *parent*, i.e. if
+the scalar sits in a vector, the vector determines the bit width for all
+elements at once. Selecting the minimum bit width for a particular vector is
+something the encoder does automatically and thus is typically of no concern
+to the user, though being aware of this feature (and not sticking a double in
+the same vector as a bunch of byte sized elements) is helpful for efficiency.
+
+Unlike FlatBuffers there is only one kind of offset, and that is an unsigned
+integer indicating the number of bytes in a negative direction from the address
+of itself (where the offset is stored).
+
+### Vectors
+
+The representation of the vector is at the core of how FlexBuffers works (since
+maps are really just a combination of 2 vectors), so it is worth starting there.
+
+As mentioned, a vector is governed by a single bit width (supplied by its
+parent). This includes the size field. For example, a vector that stores the
+integer values `1, 2, 3` is encoded as follows:
+
+    uint8_t 3, 1, 2, 3, 4, 4, 4
+
+The first `3` is the size field, and is placed before the vector (an offset
+from the parent to this vector points to the first element, not the size
+field, so the size field is effectively at index -1).
+Since this is an untyped vector `SL_VECTOR`, it is followed by 3 type
+bytes (one per element of the vector), which are always following the vector,
+and are always a uint8_t even if the vector is made up of bigger scalars.
+
+### Types
+
+A type byte is made up of 2 components (see flexbuffers.h for exact values):
+
+* 2 lower bits representing the bit-width of the child (8, 16, 32, 64).
+  This is only used if the child is accessed over an offset, such as a child
+  vector. It is ignored for inline types.
+* 6 bits representing the actual type (see flexbuffers.h).
+
+Thus, in this example `4` means 8 bit child (value 0, unused, since the value is
+in-line), type `SL_INT` (value 1).
+
+### Typed Vectors
+
+These are like the Vectors above, but omit the type bytes. The type is instead
+determined by the vector type supplied by the parent. Typed vectors are only
+available for a subset of types for which these savings can be significant,
+namely inline signed/unsigned integers (`TYPE_VECTOR_INT` / `TYPE_VECTOR_UINT`),
+floats (`TYPE_VECTOR_FLOAT`), and keys (`TYPE_VECTOR_KEY`, see below).
+
+Additionally, for scalars, there are fixed length vectors of sizes 2 / 3 / 4
+that don't store the size (`TYPE_VECTOR_INT2` etc.), for an additional savings
+in space when storing common vector or color data.
+
+### Scalars
+
+FlexBuffers supports integers (`TYPE_INT` and `TYPE_UINT`) and floats
+(`TYPE_FLOAT`), available in the bit-widths mentioned above. They can be stored
+both inline and over an offset (`TYPE_INDIRECT_*`).
+
+The offset version is useful to encode costly 64bit (or even 32bit) quantities
+into vectors / maps of smaller sizes, and to share / repeat a value multiple
+times.
+
+### Blobs, Strings and Keys.
+
+A blob (`TYPE_BLOB`) is encoded similar to a vector, with one difference: the
+elements are always `uint8_t`. The parent bit width only determines the width of
+the size field, allowing blobs to be large without the elements being large.
+
+Strings (`TYPE_STRING`) are similar to blobs, except they have an additional 0
+termination byte for convenience, and they MUST be UTF-8 encoded (since an
+accessor in a language that does not support pointers to UTF-8 data may have to
+convert them to a native string type).
+
+A "Key" (`TYPE_KEY`) is similar to a string, but doesn't store the size
+field. They're so named because they are used with maps, which don't care
+for the size, and can thus be even more compact. Unlike strings, keys cannot
+contain bytes of value 0 as part of their data (size can only be determined by
+`strlen`), so while you can use them outside the context of maps if you so
+desire, you're usually better off with strings.
+
+### Maps
+
+A map (`TYPE_MAP`) is like an (untyped) vector, but with 2 prefixes before the
+size field:
+
+| index | field                                                        |
+| ----: | :----------------------------------------------------------- |
+| -3    | An offset to the keys vector (may be shared between tables). |
+| -2    | Byte width of the keys vector.                               |
+| -1    | Size (from here on it is compatible with `TYPE_VECTOR`)      |
+| 0     | Elements.                                                    |
+| Size  | Types.                                                       |
+
+Since a map is otherwise the same as a vector, it can be iterated like
+a vector (which is probably faster than lookup by key).
+
+The keys vector is a typed vector of keys. Both the keys and corresponding
+values *have* to be stored in sorted order (as determined by `strcmp`), such
+that lookups can be made using binary search.
+
+The reason the key vector is a seperate structure from the value vector is
+such that it can be shared between multiple value vectors, and also to
+allow it to be treated as its own indivual vector in code.
+
+An example map { foo: 13, bar: 14 } would be encoded as:
+
+    0 : uint8_t 'f', 'o', 'o', 0
+    4 : uint8_t 'b', 'a', 'r', 0
+    8 : uint8_t 2      // key vector of size 2
+    // key vector offset points here
+    9 : uint8_t 9, 6   // offsets to foo_key and bar_key
+    11: uint8_t 3, 1   // offset to key vector, and its byte width
+    13: uint8_t 2      // value vector of size
+    // value vector offset points here
+    14: uint8_t 13, 14 // values
+    16: uint8_t 4, 4   // types
+
+### The root
+
+As mentioned, the root starts at the end of the buffer.
+The last uint8_t is the width in bytes of the root (normally the parent
+determines the width, but the root has no parent). The uint8_t before this is
+the type of the root, and the bytes before that are the root value (of the
+number of bytes specified by the last byte).
+
+So for example, the integer value `13` as root would be:
+
+    uint8_t 13, 4, 1    // Value, type, root byte width.
+
+
 <br>
diff --git a/docs/source/doxyfile b/docs/source/doxyfile
index db3eeac7151b52f767543bbdbdb905a8e0005766..770da9f2bedfda5e8e444660b497445951103bbb 100755
--- a/docs/source/doxyfile
+++ b/docs/source/doxyfile
@@ -759,6 +759,7 @@ INPUT = "FlatBuffers.md" \
         "Support.md" \
         "Benchmarks.md" \
         "WhitePaper.md" \
+        "FlexBuffers.md" \
         "Internals.md" \
         "Grammar.md" \
         "../../CONTRIBUTING.md" \
diff --git a/docs/source/doxygen_layout.xml b/docs/source/doxygen_layout.xml
index b5d8644c1666bc3bc4d25b4b9cdb63d4e6df1513..77866df3a1974f2668d9902bc596e61289a04977 100644
--- a/docs/source/doxygen_layout.xml
+++ b/docs/source/doxygen_layout.xml
@@ -37,6 +37,8 @@
           title="Use in PHP"/>
       <tab type="user" url="@ref flatbuffers_guide_use_python"
           title="Use in Python"/>
+      <tab type="user" url="@ref flexbuffers"
+          title="Schema-less version"/>
     </tab>
     <tab type="user" url="@ref flatbuffers_support"
         title="Platform / Language / Feature support"/>
diff --git a/include/flatbuffers/flexbuffers.h b/include/flatbuffers/flexbuffers.h
new file mode 100644
index 0000000000000000000000000000000000000000..7f7810fd5f9c52f4a5bd15e672d57cf06afb0e21
--- /dev/null
+++ b/include/flatbuffers/flexbuffers.h
@@ -0,0 +1,1332 @@
+/*
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_FLEXBUFFERS_H_
+#define FLATBUFFERS_FLEXBUFFERS_H_
+
+// We use the basic binary writing functions from the regular FlatBuffers.
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/util.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+namespace flexbuffers {
+
+class Reference;
+class Map;
+
+// These are used in the lower 2 bits of a type field to determine the size of
+// the elements (and or size field) of the item pointed to (e.g. vector).
+enum BitWidth {
+  BIT_WIDTH_8 = 0,
+  BIT_WIDTH_16 = 1,
+  BIT_WIDTH_32 = 2,
+  BIT_WIDTH_64 = 3,
+};
+
+// These are used as the upper 6 bits of a type field to indicate the actual
+// type.
+enum Type {
+  TYPE_NULL = 0,
+  TYPE_INT = 1,
+  TYPE_UINT = 2,
+  TYPE_FLOAT = 3,
+  // Types above stored inline, types below store an offset.
+  TYPE_KEY = 4,
+  TYPE_STRING = 5,
+  TYPE_INDIRECT_INT = 6,
+  TYPE_INDIRECT_UINT = 7,
+  TYPE_INDIRECT_FLOAT = 8,
+  TYPE_MAP = 9,
+  TYPE_VECTOR = 10,        // Untyped.
+  TYPE_VECTOR_INT = 11,    // Typed any size (stores no type table).
+  TYPE_VECTOR_UINT = 12,
+  TYPE_VECTOR_FLOAT = 13,
+  TYPE_VECTOR_KEY = 14,
+  TYPE_VECTOR_STRING = 15,
+  TYPE_VECTOR_INT2 = 16,   // Typed tuple (no type table, no size field).
+  TYPE_VECTOR_UINT2 = 17,
+  TYPE_VECTOR_FLOAT2 = 18,
+  TYPE_VECTOR_INT3 = 19,   // Typed triple (no type table, no size field).
+  TYPE_VECTOR_UINT3 = 20,
+  TYPE_VECTOR_FLOAT3 = 21,
+  TYPE_VECTOR_INT4 = 22,   // Typed quad (no type table, no size field).
+  TYPE_VECTOR_UINT4 = 23,
+  TYPE_VECTOR_FLOAT4 = 24,
+  TYPE_BLOB = 25,
+};
+
+inline bool IsInline(Type t) { return t <= TYPE_FLOAT; }
+
+inline bool IsTypedVectorElementType(Type t) {
+  return t >= TYPE_INT && t <= TYPE_STRING;
+}
+
+inline bool IsTypedVector(Type t) {
+  return t >= TYPE_VECTOR_INT && t <= TYPE_VECTOR_STRING;
+}
+
+inline bool IsFixedTypedVector(Type t) {
+  return t >= TYPE_VECTOR_INT2 && t <= TYPE_VECTOR_FLOAT4;
+}
+
+inline Type ToTypedVector(Type t, int fixed_len = 0) {
+  assert(IsTypedVectorElementType(t));
+  switch (fixed_len) {
+    case 0: return static_cast<Type>(t - TYPE_INT + TYPE_VECTOR_INT);
+    case 2: return static_cast<Type>(t - TYPE_INT + TYPE_VECTOR_INT2);
+    case 3: return static_cast<Type>(t - TYPE_INT + TYPE_VECTOR_INT3);
+    case 4: return static_cast<Type>(t - TYPE_INT + TYPE_VECTOR_INT4);
+    default: assert(0); return TYPE_NULL;
+  }
+}
+
+inline Type ToTypedVectorElementType(Type t) {
+  assert(IsTypedVector(t));
+  return static_cast<Type>(t - TYPE_VECTOR_INT + TYPE_INT);
+}
+
+inline Type ToFixedTypedVectorElementType(Type t, uint8_t *len) {
+  assert(IsFixedTypedVector(t));
+  auto fixed_type = t - TYPE_VECTOR_INT2;
+  *len = fixed_type / 3 + 2;  // 3 types each, starting from length 2.
+  return static_cast<Type>(fixed_type % 3 + TYPE_INT);
+}
+
+// TODO: implement proper support for 8/16bit floats, or decide not to
+// support them.
+typedef int16_t half;
+typedef int8_t quarter;
+
+// TODO: can we do this without conditionals using intrinsics or inline asm
+// on some platforms? Given branch prediction the method below should be
+// decently quick, but it is the most frequently executed function.
+// We could do an (unaligned) 64-bit read if we ifdef out the platforms for
+// which that doesn't work (or where we'd read into un-owned memory).
+template <typename R, typename T1, typename T2, typename T4, typename T8>
+R ReadSizedScalar(const uint8_t *data, uint8_t byte_width) {
+  return byte_width < 4
+    ? (byte_width < 2 ? static_cast<R>(flatbuffers::ReadScalar<T1>(data))
+                      : static_cast<R>(flatbuffers::ReadScalar<T2>(data)))
+    : (byte_width < 8 ? static_cast<R>(flatbuffers::ReadScalar<T4>(data))
+                      : static_cast<R>(flatbuffers::ReadScalar<T8>(data)));
+}
+
+
+inline int64_t ReadInt64(const uint8_t *data, uint8_t byte_width) {
+  return ReadSizedScalar<int64_t, int8_t, int16_t, int32_t, int64_t>(data,
+           byte_width);
+}
+
+inline uint64_t ReadUInt64(const uint8_t *data, uint8_t byte_width) {
+  // This is the "hottest" function (all offset lookups use this), so worth
+  // optimizing if possible.
+  // TODO: GCC apparently replaces memcpy by a rep movsb, but only if count is a
+  // constant, which here it isn't. Test if memcpy is still faster than
+  // the conditionals in ReadSizedScalar. Can also use inline asm.
+  #ifdef _MSC_VER
+    uint64_t u = 0;
+    __movsb(reinterpret_cast<int8_t *>(&u),
+            reinterpret_cast<const int8_t *>(data), byte_width);
+    return flatbuffers::EndianScalar(u);
+  #else
+    return ReadSizedScalar<uint64_t, uint8_t, uint16_t, uint32_t, uint64_t>(
+             data, byte_width);
+  #endif
+}
+
+inline double ReadDouble(const uint8_t *data, uint8_t byte_width) {
+  return ReadSizedScalar<double, quarter, half, float, double>(data,
+           byte_width);
+}
+
+const uint8_t *Indirect(const uint8_t *offset, uint8_t byte_width) {
+  return offset - ReadUInt64(offset, byte_width);
+}
+
+template<typename T> const uint8_t *Indirect(const uint8_t *offset) {
+  return offset - flatbuffers::ReadScalar<T>(offset);
+}
+
+static BitWidth WidthU(uint64_t u) {
+  #define FLATBUFFERS_GET_FIELD_BIT_WIDTH(value, width) { \
+    if (!((u) & ~((1ULL << (width)) - 1ULL))) return BIT_WIDTH_##width; \
+  }
+  FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 8);
+  FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 16);
+  FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 32);
+  #undef FLATBUFFERS_GET_FIELD_BIT_WIDTH
+  return BIT_WIDTH_64;
+}
+
+static BitWidth WidthI(int64_t i) {
+  auto u = static_cast<uint64_t>(i) << 1;
+  return WidthU(i >= 0 ? u : ~u);
+}
+
+static BitWidth WidthF(double f) {
+  return static_cast<double>(static_cast<float>(f)) == f ? BIT_WIDTH_32
+                                                         : BIT_WIDTH_64;
+}
+
+// Base class of all types below.
+// Points into the data buffer and allows access to one type.
+class Object {
+ public:
+  Object(const uint8_t *data, uint8_t byte_width)
+    : data_(data), byte_width_(byte_width) {}
+
+ protected:
+  const uint8_t *data_;
+  uint8_t byte_width_;
+};
+
+// Stores size in `byte_width_` bytes before data_ pointer.
+class Sized : public Object {
+ public:
+  Sized(const uint8_t *data, uint8_t byte_width) : Object(data, byte_width) {}
+  size_t size() const {
+    return static_cast<size_t>(ReadUInt64(data_ - byte_width_, byte_width_));
+  }
+};
+
+class String : public Sized {
+ public:
+  String(const uint8_t *data, uint8_t byte_width)
+    : Sized(data, byte_width) {}
+
+  size_t length() const { return size(); }
+  const char *c_str() const { return reinterpret_cast<const char *>(data_); }
+
+  static String EmptyString() {
+    static const uint8_t empty_string[] = { 0/*len*/, 0/*terminator*/ };
+    return String(empty_string + 1, 1);
+  }
+  bool IsTheEmptyString() const { return data_ == EmptyString().data_; }
+};
+
+class Blob : public Sized {
+ public:
+  Blob(const uint8_t *data, uint8_t byte_width)
+    : Sized(data, byte_width) {}
+
+  static Blob EmptyBlob() {
+    static const uint8_t empty_blob[] = { 0/*len*/ };
+    return Blob(empty_blob + 1, 1);
+  }
+  bool IsTheEmptyBlob() const { return data_ == EmptyBlob().data_; }
+};
+
+class Vector : public Sized {
+ public:
+  Vector(const uint8_t *data, uint8_t byte_width)
+    : Sized(data, byte_width) {}
+
+  Reference operator[](size_t i) const;
+
+  static Vector EmptyVector() {
+    static const uint8_t empty_vector[] = { 0/*len*/ };
+    return Vector(empty_vector + 1, 1);
+  }
+  bool IsTheEmptyVector() const { return data_ == EmptyVector().data_; }
+};
+
+class TypedVector : public Sized {
+ public:
+  TypedVector(const uint8_t *data, uint8_t byte_width, Type element_type)
+    : Sized(data, byte_width), type_(element_type) {}
+
+  Reference operator[](size_t i) const;
+
+  static TypedVector EmptyTypedVector() {
+    static const uint8_t empty_typed_vector[] = { 0/*len*/ };
+    return TypedVector(empty_typed_vector + 1, 1, TYPE_INT);
+  }
+  bool IsTheEmptyVector() const {
+    return data_ == TypedVector::EmptyTypedVector().data_;
+  }
+
+  Type ElementType() { return type_; }
+
+ private:
+  Type type_;
+
+  friend Map;
+};
+
+class FixedTypedVector : public Object {
+ public:
+  FixedTypedVector(const uint8_t *data, uint8_t byte_width, Type element_type,
+                   uint8_t len)
+    : Object(data, byte_width), type_(element_type), len_(len) {}
+
+  Reference operator[](size_t i) const;
+
+  static FixedTypedVector EmptyFixedTypedVector() {
+    static const uint8_t fixed_empty_vector[] = { 0/* unused */ };
+    return FixedTypedVector(fixed_empty_vector, 1, TYPE_INT, 0);
+  }
+  bool IsTheEmptyFixedTypedVector() const {
+    return data_ == FixedTypedVector::EmptyFixedTypedVector().data_;
+  }
+
+  Type ElementType() { return type_; }
+  uint8_t size() { return len_; }
+
+ private:
+  Type type_;
+  uint8_t len_;
+};
+
+class Map : public Vector {
+ public:
+  Map(const uint8_t *data, uint8_t byte_width)
+    : Vector(data, byte_width) {}
+
+  Reference operator[](const char *key) const;
+  Reference operator[](const std::string &key) const;
+
+  Vector Values() const { return Vector(data_, byte_width_); }
+
+  TypedVector Keys() const {
+    const size_t num_prefixed_fields = 3;
+    auto keys_offset = data_ - byte_width_ * num_prefixed_fields;
+    return TypedVector(Indirect(keys_offset, byte_width_),
+                       ReadUInt64(keys_offset + byte_width_, byte_width_),
+                       TYPE_KEY);
+  }
+
+  static Map EmptyMap() {
+    static const uint8_t empty_map[] = {
+      0/*keys_len*/, 0/*keys_offset*/, 1/*keys_width*/, 0/*len*/
+    };
+    return Map(empty_map + 4, 1);
+  }
+
+  bool IsTheEmptyMap() const {
+    return data_ == EmptyMap().data_;
+  }
+};
+
+class Reference {
+ public:
+  Reference(const uint8_t *data, uint8_t parent_width, uint8_t byte_width,
+            Type type)
+    : data_(data), parent_width_(parent_width), byte_width_(byte_width),
+      type_(type) {}
+
+  Reference(const uint8_t *data, uint8_t parent_width, uint8_t packed_type)
+    : data_(data), parent_width_(parent_width) {
+    byte_width_ = 1U << static_cast<BitWidth>(packed_type & 3);
+    type_ = static_cast<Type>(packed_type >> 2);
+  }
+
+  Type GetType() const { return type_; }
+
+  bool IsNull() const { return type_ == TYPE_NULL; }
+  bool IsInt() const { return type_ == TYPE_INT ||
+                              type_ == TYPE_INDIRECT_INT; }
+  bool IsUInt() const { return type_ == TYPE_UINT||
+                               type_ == TYPE_INDIRECT_UINT;; }
+  bool IsIntOrUint() const { return IsInt() || IsUInt(); }
+  bool IsFloat() const { return type_ == TYPE_FLOAT ||
+                                type_ == TYPE_INDIRECT_FLOAT; }
+  bool IsNumeric() const { return IsIntOrUint() || IsFloat(); }
+  bool IsString() const { return type_ == TYPE_STRING; }
+  bool IsKey() const { return type_ == TYPE_KEY; }
+  bool IsVector() const { return type_ == TYPE_VECTOR || type_ == TYPE_MAP; }
+  bool IsMap() const { return type_ == TYPE_MAP; }
+
+  // Reads any type as a int64_t. Never fails, does most sensible conversion.
+  // Truncates floats, strings are attempted to be parsed for a number,
+  // vectors/maps return their size. Returns 0 if all else fails.
+  int64_t AsInt64() const {
+    if (type_ == TYPE_INT) {
+      // A fast path for the common case.
+      return ReadInt64(data_, parent_width_);
+    } else switch (type_) {
+      case TYPE_INDIRECT_INT: return ReadInt64(Indirect(), byte_width_);
+      case TYPE_UINT: return ReadUInt64(data_, parent_width_);
+      case TYPE_INDIRECT_UINT: return ReadUInt64(Indirect(), byte_width_);
+      case TYPE_FLOAT: return static_cast<int64_t>(
+                                ReadDouble(data_, parent_width_));
+      case TYPE_INDIRECT_FLOAT: return static_cast<int64_t>(
+                                         ReadDouble(Indirect(), byte_width_));
+      case TYPE_NULL: return 0;
+      case TYPE_STRING: return flatbuffers::StringToInt(AsString().c_str());
+      case TYPE_VECTOR: return static_cast<int64_t>(AsVector().size());
+      default:
+      // Convert other things to int.
+      return 0;
+    }
+  }
+
+  // TODO: could specialize these to not use AsInt64() if that saves
+  // extension ops in generated code, and use a faster op than ReadInt64.
+  int32_t AsInt32() const { return static_cast<int32_t>(AsInt64()); }
+  int16_t AsInt16() const { return static_cast<int16_t>(AsInt64()); }
+  int8_t  AsInt8()  const { return static_cast<int8_t> (AsInt64()); }
+
+  uint64_t AsUInt64() const {
+    if (type_ == TYPE_UINT) {
+      // A fast path for the common case.
+      return ReadUInt64(data_, parent_width_);
+    } else switch (type_) {
+      case TYPE_INDIRECT_UINT: return ReadUInt64(Indirect(), byte_width_);
+      case TYPE_INT: return ReadInt64(data_, parent_width_);
+      case TYPE_INDIRECT_INT: return ReadInt64(Indirect(), byte_width_);
+      case TYPE_FLOAT: return static_cast<uint64_t>(
+                                ReadDouble(data_, parent_width_));
+      case TYPE_INDIRECT_FLOAT: return static_cast<uint64_t>(
+                                  ReadDouble(Indirect(), byte_width_));
+      case TYPE_NULL: return 0;
+      case TYPE_STRING: return flatbuffers::StringToUInt(AsString().c_str());
+      case TYPE_VECTOR: return static_cast<uint64_t>(AsVector().size());
+      default:
+      // Convert other things to uint.
+      return 0;
+    }
+  }
+
+  uint32_t AsUInt32() const { return static_cast<uint32_t>(AsUInt64()); }
+  uint16_t AsUInt16() const { return static_cast<uint16_t>(AsUInt64()); }
+  uint8_t  AsUInt8()  const { return static_cast<uint8_t> (AsUInt64()); }
+
+  double AsDouble() const {
+    if (type_ == TYPE_FLOAT) {
+      // A fast path for the common case.
+      return ReadDouble(data_, parent_width_);
+    } else switch (type_) {
+      case TYPE_INDIRECT_FLOAT: return ReadDouble(Indirect(), byte_width_);
+      case TYPE_INT: return static_cast<double>(
+                              ReadInt64(data_, parent_width_));
+      case TYPE_UINT: return static_cast<double>(
+                               ReadUInt64(data_, parent_width_));
+      case TYPE_INDIRECT_INT: return static_cast<double>(
+                                       ReadInt64(Indirect(), byte_width_));
+      case TYPE_INDIRECT_UINT: return static_cast<double>(
+                                        ReadUInt64(Indirect(), byte_width_));
+      case TYPE_NULL: return 0.0;
+      case TYPE_STRING: return strtod(AsString().c_str(), nullptr);
+      case TYPE_VECTOR: return static_cast<double>(AsVector().size());
+      default:
+      // Convert strings and other things to float.
+      return 0;
+    }
+  }
+
+  float AsFloat() const { return static_cast<float>(AsDouble()); }
+
+  const char *AsKey() const {
+    if (type_ == TYPE_KEY) {
+      return reinterpret_cast<const char *>(Indirect());
+    } else {
+      return "";
+    }
+  }
+
+  // This function returns the empty string if you try to read a not-string.
+  String AsString() const {
+    if (type_ == TYPE_STRING) {
+      return String(Indirect(), byte_width_);
+    } else {
+      return String::EmptyString();
+    }
+  }
+
+  // Unlike AsString(), this will convert any type to a std::string.
+  std::string ToString() const {
+    if (type_ == TYPE_STRING) {
+      return String(Indirect(), byte_width_).c_str();
+    } else if (IsKey()) {
+      return AsKey();
+    } else if (IsInt()) {
+      return flatbuffers::NumToString(AsInt64());
+    } else if (IsUInt()) {
+      return flatbuffers::NumToString(AsUInt64());
+    } else if (IsFloat()) {
+      return flatbuffers::NumToString(AsDouble());
+    } else if (IsNull()) {
+      return "null";
+    } else if (IsMap()) {
+      return "{..}";  // TODO: show elements.
+    } else if (IsVector()) {
+      return "[..]";  // TODO: show elements.
+    } else {
+      return "(?)";
+    }
+  }
+
+  // This function returns the empty blob if you try to read a not-blob.
+  // Strings can be viewed as blobs too.
+  Blob AsBlob() const {
+    if (type_ == TYPE_BLOB || type_ == TYPE_STRING) {
+      return Blob(Indirect(), byte_width_);
+    } else {
+      return Blob::EmptyBlob();
+    }
+  }
+
+  // This function returns the empty vector if you try to read a not-vector.
+  // Maps can be viewed as vectors too.
+  Vector AsVector() const {
+    if (type_ == TYPE_VECTOR || type_ == TYPE_MAP) {
+      return Vector(Indirect(), byte_width_);
+    } else {
+      return Vector::EmptyVector();
+    }
+  }
+
+  TypedVector AsTypedVector() const {
+    if (IsTypedVector(type_)) {
+      return TypedVector(Indirect(), byte_width_,
+                         ToTypedVectorElementType(type_));
+    } else {
+      return TypedVector::EmptyTypedVector();
+    }
+  }
+
+  FixedTypedVector AsFixedTypedVector() const {
+    if (IsFixedTypedVector(type_)) {
+      uint8_t len = 0;
+      auto vtype = ToFixedTypedVectorElementType(type_, &len);
+      return FixedTypedVector(Indirect(), byte_width_, vtype, len);
+    } else {
+      return FixedTypedVector::EmptyFixedTypedVector();
+    }
+  }
+
+  Map AsMap() const {
+    if (type_ == TYPE_MAP) {
+      return Map(Indirect(), byte_width_);
+    } else {
+      return Map::EmptyMap();
+    }
+  }
+
+  // Experimental: Mutation functions.
+  // These allow scalars in an already created buffer to be updated in-place.
+  // Since by default scalars are stored in the smallest possible space,
+  // the new value may not fit, in which case these functions return false.
+  // To avoid this, you can construct the values you intend to mutate using
+  // Builder::ForceMinimumBitWidth.
+  bool MutateInt(int64_t i) {
+    if (type_ == TYPE_INT) {
+      return Mutate(data_, i, parent_width_, WidthI(i));
+    } else if (type_ == TYPE_INDIRECT_INT) {
+      return Mutate(Indirect(), i, byte_width_, WidthI(i));
+    } else if (type_ == TYPE_UINT) {
+      auto u = static_cast<uint64_t>(i);
+      return Mutate(data_, u, parent_width_, WidthU(u));
+    } else if (type_ == TYPE_INDIRECT_UINT) {
+      auto u = static_cast<uint64_t>(i);
+      return Mutate(Indirect(), u, byte_width_, WidthU(u));
+    } else {
+      return false;
+    }
+  }
+
+  bool MutateUInt(uint64_t u) {
+    if (type_ == TYPE_UINT) {
+      return Mutate(data_, u, parent_width_, WidthU(u));
+    } else if (type_ == TYPE_INDIRECT_UINT) {
+      return Mutate(Indirect(), u, byte_width_, WidthU(u));
+    } else if (type_ == TYPE_INT) {
+      auto i = static_cast<int64_t>(u);
+      return Mutate(data_, i, parent_width_, WidthI(i));
+    } else if (type_ == TYPE_INDIRECT_INT) {
+      auto i = static_cast<int64_t>(u);
+      return Mutate(Indirect(), i, byte_width_, WidthI(i));
+    } else {
+      return false;
+    }
+  }
+
+  bool MutateFloat(float f) {
+    if (type_ == TYPE_FLOAT) {
+      return MutateF(data_, f, parent_width_, BIT_WIDTH_32);
+    } else if (type_ == TYPE_INDIRECT_FLOAT) {
+      return MutateF(Indirect(), f, byte_width_, BIT_WIDTH_32);
+    } else {
+      return false;
+    }
+  }
+
+  bool MutateFloat(double d) {
+    if (type_ == TYPE_FLOAT) {
+      return MutateF(data_, d, parent_width_, WidthF(d));
+    } else if (type_ == TYPE_INDIRECT_FLOAT) {
+      return MutateF(Indirect(), d, byte_width_, WidthF(d));
+    } else {
+      return false;
+    }
+  }
+
+  bool MutateString(const char *str, size_t len) {
+    auto s = AsString();
+    if (s.IsTheEmptyString()) return false;
+    // This is very strict, could allow shorter strings, but that creates
+    // garbage.
+    if (s.length() != len) return false;
+    memcpy(const_cast<char *>(s.c_str()), str, len);
+    return true;
+  }
+  bool MutateString(const char *str) {
+    return MutateString(str, strlen(str));
+  }
+  bool MutateString(const std::string &str) {
+    return MutateString(str.data(), str.length());
+  }
+
+ private:
+  const uint8_t *Indirect() const {
+    return flexbuffers::Indirect(data_, parent_width_);
+  }
+
+  template<typename T> bool Mutate(const uint8_t *dest, T t, size_t byte_width,
+                                   BitWidth value_width) {
+    auto fits = (1U << value_width) <= byte_width;
+    if (fits) {
+      t = flatbuffers::EndianScalar(t);
+      memcpy(const_cast<uint8_t *>(dest), &t, byte_width);
+    }
+    return fits;
+  }
+
+  template<typename T> bool MutateF(const uint8_t *dest, T t, size_t byte_width,
+                                    BitWidth value_width) {
+    if (byte_width == sizeof(double))
+      return Mutate(dest, static_cast<double>(t), byte_width, value_width);
+    if (byte_width == sizeof(float))
+      return Mutate(dest, static_cast<float>(t), byte_width, value_width);
+    assert(false);
+    return false;
+  }
+
+  const uint8_t *data_;
+  uint8_t parent_width_;
+  uint8_t byte_width_;
+  Type type_;
+};
+
+inline uint8_t PackedType(BitWidth bit_width, Type type) {
+  return static_cast<uint8_t>(bit_width | (type << 2));
+}
+
+inline uint8_t NullPackedType() {
+  return PackedType(BIT_WIDTH_8, TYPE_NULL);
+}
+
+// Vector accessors.
+// Note: if you try to access outside of bounds, you get a Null value back
+// instead. Normally this would be an assert, but since this is "dynamically
+// typed" data, you may not want that (someone sends you a 2d vector and you
+// wanted 3d).
+// The Null converts seamlessly into a default value for any other type.
+// TODO(wvo): Could introduce an #ifdef that makes this into an assert?
+inline Reference Vector::operator[](size_t i) const  {
+  auto len = size();
+  if (i >= len) return Reference(nullptr, 1, NullPackedType());
+  auto packed_type = (data_ + len * byte_width_)[i];
+  auto elem = data_ + i * byte_width_;
+  return Reference(elem, byte_width_, packed_type);
+}
+
+inline Reference TypedVector::operator[](size_t i) const  {
+  auto len = size();
+  if (i >= len) return Reference(nullptr, 1, NullPackedType());
+  auto elem = data_ + i * byte_width_;
+  return Reference(elem, byte_width_, 1, type_);
+}
+
+inline Reference FixedTypedVector::operator[](size_t i) const  {
+  if (i >= len_) return Reference(nullptr, 1, NullPackedType());
+  auto elem = data_ + i * byte_width_;
+  return Reference(elem, byte_width_, 1, type_);
+}
+
+template<typename T> int KeyCompare(const void *key, const void *elem) {
+  auto str_elem = reinterpret_cast<const char *>(
+                    Indirect<T>(reinterpret_cast<const uint8_t *>(elem)));
+  auto skey = reinterpret_cast<const char *>(key);
+  return strcmp(skey, str_elem);
+}
+
+inline Reference Map::operator[](const char *key) const {
+  auto keys = Keys();
+  // We can't pass keys.byte_width_ to the comparison function, so we have
+  // to pick the right one ahead of time.
+  int (*comp)(const void *, const void *) = nullptr;
+  switch (keys.byte_width_) {
+    case 1: comp = KeyCompare<uint8_t>; break;
+    case 2: comp = KeyCompare<uint16_t>; break;
+    case 4: comp = KeyCompare<uint32_t>; break;
+    case 8: comp = KeyCompare<uint64_t>; break;
+  }
+  auto res = std::bsearch(key, keys.data_, keys.size(), keys.byte_width_, comp);
+  if (!res)
+    return Reference(nullptr, 1, NullPackedType());
+  auto i = (reinterpret_cast<uint8_t *>(res) - keys.data_) / keys.byte_width_;
+  return (*static_cast<const Vector *>(this))[i];
+}
+
+inline Reference Map::operator[](const std::string &key) const {
+  return (*this)[key.c_str()];
+}
+
+inline Reference GetRoot(const uint8_t *buffer, size_t size) {
+  // See Finish() below for the serialization counterpart of this.
+  // The root starts at the end of the buffer, so we parse backwards from there.
+  auto end = buffer + size;
+  auto byte_width = *--end;
+  auto packed_type = *--end;
+  end -= byte_width;  // The root data item.
+  return Reference(end, byte_width, packed_type);
+}
+
+inline Reference GetRoot(const std::vector<uint8_t> &buffer) {
+  return GetRoot(buffer.data(), buffer.size());
+}
+
+// Flags that configure how the Builder behaves.
+// The "Share" flags determine if the Builder automatically tries to pool
+// this type. Pooling can reduce the size of serialized data if there are
+// multiple maps of the same kind, at the expense of slightly slower
+// serialization (the cost of lookups) and more memory use (std::set).
+// By default this is on for keys, but off for strings.
+// Turn keys off if you have e.g. only one map.
+// Turn strings on if you expect many non-unique string values.
+// Additionally, sharing key vectors can save space if you have maps with
+// identical field populations.
+enum BuilderFlag {
+  BUILDER_FLAG_NONE = 0,
+  BUILDER_FLAG_SHARE_KEYS = 1,
+  BUILDER_FLAG_SHARE_STRINGS = 2,
+  BUILDER_FLAG_SHARE_KEYS_AND_STRINGS = 3,
+  BUILDER_FLAG_SHARE_KEY_VECTORS = 4,
+  BUILDER_FLAG_SHARE_ALL = 7,
+};
+
+class Builder FLATBUFFERS_FINAL_CLASS {
+ public:
+  Builder(size_t initial_size = 256,
+          BuilderFlag flags = BUILDER_FLAG_SHARE_KEYS)
+      : buf_(initial_size), finished_(false), flags_(flags),
+        force_min_bit_width_(BIT_WIDTH_8), key_pool(KeyOffsetCompare(buf_)),
+        string_pool(StringOffsetCompare(buf_)) {
+    buf_.clear();
+  }
+
+  /// @brief Get the serialized buffer (after you call `Finish()`).
+  /// @return Returns a vector owned by this class.
+  const std::vector<uint8_t> &GetBuffer() const {
+    Finished();
+    return buf_;
+  }
+
+  // All value constructing functions below have two versions: one that
+  // takes a key (for placement inside a map) and one that doesn't (for inside
+  // vectors and elsewhere).
+
+  void Null() { stack_.push_back(Value()); }
+  void Null(const char *key) { Key(key); Null(); }
+
+  void Int(int64_t i) { stack_.push_back(Value(i, TYPE_INT, WidthI(i))); }
+  void Int(const char *key, int64_t i) { Key(key); Int(i); }
+
+  void UInt(uint64_t u) { stack_.push_back(Value(u, TYPE_UINT, WidthU(u))); }
+  void UInt(const char *key, uint64_t u) { Key(key); Int(u); }
+
+  void Float(float f) { stack_.push_back(Value(f)); }
+  void Float(const char *key, float f) { Key(key); Float(f); }
+
+  void Double(double f) { stack_.push_back(Value(f)); }
+  void Double(const char *key, double d) { Key(key); Double(d); }
+
+  void Bool(bool b) { Int(static_cast<int64_t>(b)); }
+  void Bool(const char *key, bool b) { Key(key); Bool(b); }
+
+  void IndirectInt(int64_t i) {
+    PushIndirect(i, TYPE_INDIRECT_INT, WidthI(i));
+  }
+  void IndirectInt(const char *key, int64_t i) {
+    Key(key);
+    IndirectInt(i);
+  }
+
+  void IndirectUInt(uint64_t u) {
+    PushIndirect(u, TYPE_INDIRECT_UINT, WidthU(u));
+  }
+  void IndirectUInt(const char *key, uint64_t u) {
+    Key(key);
+    IndirectUInt(u);
+  }
+
+  void IndirectFloat(float f) {
+    PushIndirect(f, TYPE_INDIRECT_FLOAT, BIT_WIDTH_32);
+  }
+  void IndirectFloat(const char *key, float f) {
+    Key(key);
+    IndirectFloat(f);
+  }
+
+  void IndirectDouble(double f) {
+    PushIndirect(f, TYPE_INDIRECT_FLOAT, WidthF(f));
+  }
+  void IndirectDouble(const char *key, double d) {
+    Key(key);
+    IndirectDouble(d);
+  }
+
+  size_t Key(const char *str, size_t len) {
+    auto sloc = buf_.size();
+    WriteBytes(str, len + 1);
+    if (flags_ & BUILDER_FLAG_SHARE_KEYS) {
+      auto it = key_pool.find(sloc);
+      if (it != key_pool.end()) {
+        // Already in the buffer. Remove key we just serialized, and use
+        // existing offset instead.
+        buf_.resize(sloc);
+        sloc = *it;
+      } else {
+        key_pool.insert(sloc);
+      }
+    }
+    stack_.push_back(Value(static_cast<uint64_t>(sloc), TYPE_KEY, BIT_WIDTH_8));
+    return sloc;
+  }
+
+  size_t Key(const char *str) { return Key(str, strlen(str)); }
+  size_t Key(const std::string &str) { return Key(str.c_str(), str.size()); }
+
+  size_t String(const char *str, size_t len) {
+    auto reset_to = buf_.size();
+    auto sloc = CreateBlob(str, len, 1, TYPE_STRING);
+    if (flags_ & BUILDER_FLAG_SHARE_STRINGS) {
+      StringOffset so(sloc, len);
+      auto it = string_pool.find(so);
+      if (it != string_pool.end()) {
+        // Already in the buffer. Remove string we just serialized, and use
+        // existing offset instead.
+        buf_.resize(reset_to);
+        sloc = it->first;
+        stack_.back().u_ = sloc;
+      } else {
+        string_pool.insert(so);
+      }
+    }
+    return sloc;
+  }
+  size_t String(const char *str) {
+    return String(str, strlen(str));
+  }
+  size_t String(const std::string &str) {
+    return String(str.c_str(), str.size());
+  }
+  void String(const flexbuffers::String &str) {
+    String(str.c_str(), str.length());
+  }
+
+  void String(const char *key, const char *str) {
+    Key(key);
+    String(str);
+  }
+  void String(const char *key, const std::string &str) {
+    Key(key);
+    String(str);
+  }
+  void String(const char *key, const flexbuffers::String &str) {
+    Key(key);
+    String(str);
+  }
+
+  size_t Blob(const void *data, size_t len) {
+    return CreateBlob(data, len, 0, TYPE_BLOB);
+  }
+  size_t Blob(const std::vector<uint8_t> &v) {
+    return CreateBlob(v.data(), v.size(), 0, TYPE_BLOB);
+  }
+
+  // TODO(wvo): support all the FlexBuffer types (like flexbuffers::String),
+  // e.g. Vector etc. Also in overloaded versions.
+  // Also some FlatBuffers types?
+
+  size_t StartVector() { return stack_.size(); }
+  size_t StartVector(const char *key) { Key(key); return stack_.size(); }
+  size_t StartMap() { return stack_.size(); }
+  size_t StartMap(const char *key) { Key(key); return stack_.size(); }
+
+  // TODO(wvo): allow this to specify an aligment greater than the natural
+  // alignment.
+  size_t EndVector(size_t start, bool typed, bool fixed) {
+    auto vec = CreateVector(start, stack_.size() - start, 1, typed, fixed);
+    // Remove temp elements and return vector.
+    stack_.resize(start);
+    stack_.push_back(vec);
+    return vec.u_;
+  }
+
+  size_t EndMap(size_t start) {
+    // We should have interleaved keys and values on the stack.
+    // Make sure it is an even number:
+    auto len = stack_.size() - start;
+    assert(!(len & 1));
+    len /= 2;
+    // Make sure keys are all strings:
+    for (auto key = start; key < stack_.size(); key += 2) {
+      assert(stack_[key].type_ == TYPE_KEY);
+    }
+    // Now sort values, so later we can do a binary seach lookup.
+    // We want to sort 2 array elements at a time.
+    struct TwoValue { Value key; Value val; };
+    // TODO(wvo): strict aliasing?
+    // TODO(wvo): allow the caller to indicate the data is already sorted
+    // for maximum efficiency? With an assert to check sortedness to make sure
+    // we're not breaking binary search.
+    // Or, we can track if the map is sorted as keys are added which would be
+    // be quite cheap (cheaper than checking it here), so we can skip this
+    // step automatically when appliccable, and encourage people to write in
+    // sorted fashion.
+    // std::sort is typically already a lot faster on sorted data though.
+    auto dict = reinterpret_cast<TwoValue *>(stack_.data() + start);
+    std::sort(dict, dict + len, [&](const TwoValue &a, const TwoValue &b) {
+      auto as = reinterpret_cast<const char *>(buf_.data() + a.key.u_);
+      auto bs = reinterpret_cast<const char *>(buf_.data() + b.key.u_);
+      auto comp = strcmp(as, bs);
+      // If this assertion hits, you've added two keys with the same value to
+      // this map.
+      assert(comp);
+      return comp < 0;
+    });
+    // First create a vector out of all keys.
+    // TODO(wvo): if kBuilderFlagShareKeyVectors is true, see if we can share
+    // the first vector.
+    auto keys = CreateVector(start, len, 2, true, false);
+    auto vec = CreateVector(start + 1, len, 2, false, false, &keys);
+    // Remove temp elements and return map.
+    stack_.resize(start);
+    stack_.push_back(vec);
+    return vec.u_;
+  }
+
+  template<typename F> size_t Vector(F f) {
+    auto start = StartVector();
+    f();
+    return EndVector(start, false, false);
+  }
+  template<typename F> size_t Vector(const char *key, F f) {
+    auto start = StartVector(key);
+    f();
+    return EndVector(start, false, false);
+  }
+  template<typename T> void Vector(const T *elems, size_t len) {
+    if (std::is_scalar<T>::value) {
+      // This path should be a lot quicker and use less space.
+      ScalarVector(elems, len, false);
+    } else {
+      auto start = StartVector();
+      for (size_t i = 0; i < len; i++) Add(elems[i]);
+      EndVector(start, false, false);
+    }
+  }
+  template<typename T> void Vector(const std::vector<T> &vec) {
+    Vector(vec.data(), vec.size());
+  }
+
+  template<typename F> size_t TypedVector(F f) {
+    auto start = StartVector();
+    f();
+    return EndVector(start, true, false);
+  }
+  template<typename F> size_t TypedVector(const char *key, F f) {
+    auto start = StartVector(key);
+    f();
+    return EndVector(start, true, false);
+  }
+
+  template<typename T> size_t FixedTypedVector(const T *elems, size_t len) {
+    // We only support a few fixed vector lengths. Anything bigger use a
+    // regular typed vector.
+    assert(len >= 2 && len <= 4);
+    // And only scalar values.
+    assert(std::is_scalar<T>::value);
+    return ScalarVector(elems, len, true);
+  }
+
+  template<typename T> size_t FixedTypedVector(const char *key, const T *elems,
+                                               size_t len) {
+    Key(key);
+    return FixedTypedVector(elems, len);
+  }
+
+  template<typename F> size_t Map(F f) {
+    auto start = StartMap();
+    f();
+    return EndMap(start);
+  }
+  template<typename F> size_t Map(const char *key, F f) {
+    auto start = StartMap(key);
+    f();
+    return EndMap(start);
+  }
+  template<typename T> void Map(const std::map<std::string, T> &map) {
+    auto start = StartMap();
+    for (auto it = map.begin(); it != map.end(); ++it)
+      Add(it->first.c_str(), it->second);
+    EndMap(start);
+  }
+
+  // Overloaded Add that tries to call the correct function above.
+  void Add(int8_t i) { Int(i); }
+  void Add(int16_t i) { Int(i); }
+  void Add(int32_t i) { Int(i); }
+  void Add(int64_t i) { Int(i); }
+  void Add(uint8_t u) { UInt(u); }
+  void Add(uint16_t u) { UInt(u); }
+  void Add(uint32_t u) { UInt(u); }
+  void Add(uint64_t u) { UInt(u); }
+  void Add(float f) { Float(f); }
+  void Add(double d) { Double(d); }
+  void Add(bool b) { Bool(b); }
+  void Add(const char *str) { String(str); }
+  void Add(const std::string &str) { String(str); }
+  void Add(const flexbuffers::String &str) { String(str); }
+
+  template<typename T> void Add(const std::vector<T> &vec) {
+    Vector(vec);
+  }
+
+  template<typename T> void Add(const char *key, const T &t) {
+    Key(key);
+    Add(t);
+  }
+
+  template<typename T> void Add(const std::map<std::string, T> &map) {
+    Map(map);
+  }
+
+  template<typename T> void operator+=(const T &t) {
+    Add(t);
+  }
+
+  // This function is useful in combination with the Mutate* functions above.
+  // It forces elements of vectors and maps to have a minimum size, such that
+  // they can later be updated without failing.
+  // Call with no arguments to reset.
+  void ForceMinimumBitWidth(BitWidth bw = BIT_WIDTH_8) {
+    force_min_bit_width_ = bw;
+  }
+
+  void Finish() {
+    // If you hit this assert, you likely have objects that were never included
+    // in a parent. You need to have exactly one root to finish a buffer.
+    // Check your Start/End calls are matched, and all objects are inside
+    // some other object.
+    assert(stack_.size() == 1);
+
+    // Write root value.
+    auto byte_width = Align(stack_[0].ElemWidth(buf_.size(), 0));
+    WriteAny(stack_[0], byte_width);
+    // Write root type.
+    Write(stack_[0].StoredPackedType(), 1);
+    // Write root size. Normally determined by parent, but root has no parent :)
+    Write(byte_width, 1);
+
+    finished_ = true;
+  }
+
+ private:
+  void Finished() const {
+    // If you get this assert, you're attempting to get access a buffer
+    // which hasn't been finished yet. Be sure to call
+    // Builder::Finish with your root object.
+    assert(finished_);
+  }
+
+  // Align to prepare for writing a scalar with a certain size.
+  uint8_t Align(BitWidth alignment) {
+    auto byte_width = 1U << alignment;
+    buf_.insert(buf_.end(), flatbuffers::PaddingBytes(buf_.size(), byte_width),
+                0);
+    return byte_width;
+  }
+
+  void WriteBytes(const void *val, size_t size) {
+    buf_.insert(buf_.end(),
+                reinterpret_cast<const uint8_t *>(val),
+                reinterpret_cast<const uint8_t *>(val) + size);
+  }
+
+  // For values T >= byte_width
+  template<typename T> void Write(T val, uint8_t byte_width) {
+    val = flatbuffers::EndianScalar(val);
+    WriteBytes(&val, byte_width);
+  }
+
+  void WriteDouble(double f, uint8_t byte_width) {
+    switch (byte_width) {
+      case 8: Write(f, byte_width); break;
+      case 4: Write(static_cast<float>(f), byte_width); break;
+      //case 2: Write(static_cast<half>(f), byte_width); break;
+      //case 1: Write(static_cast<quarter>(f), byte_width); break;
+      default: assert(0);
+    }
+  }
+
+  void WriteOffset(uint64_t o, uint8_t byte_width) {
+    auto reloff = buf_.size() - o;
+    assert(reloff < 1ULL << (byte_width * 8) || byte_width == 8);
+    Write(reloff, byte_width);
+  }
+
+  template<typename T> void PushIndirect(T val, Type type, BitWidth bit_width) {
+    auto byte_width = Align(bit_width);
+    auto iloc = buf_.size();
+    Write(val, byte_width);
+    stack_.push_back(Value(static_cast<uint64_t>(iloc), type, bit_width));
+  }
+
+  static BitWidth WidthB(size_t byte_width) {
+    switch (byte_width) {
+      case 1: return BIT_WIDTH_8;
+      case 2: return BIT_WIDTH_16;
+      case 4: return BIT_WIDTH_32;
+      case 8: return BIT_WIDTH_64;
+      default: assert(false); return BIT_WIDTH_64;
+    }
+  }
+
+  template<typename T> static Type GetScalarType() {
+    assert(std::is_scalar<T>::value);
+    return std::is_floating_point<T>::value
+        ? TYPE_FLOAT
+        : (std::is_unsigned<T>::value ? TYPE_UINT : TYPE_INT);
+  }
+
+  struct Value {
+    union {
+      int64_t i_;
+      uint64_t u_;
+      double f_;
+    };
+
+    Type type_;
+
+    // For scalars: of itself, for vector: of its elements, for string: length.
+    BitWidth min_bit_width_;
+
+    Value() : i_(0), type_(TYPE_NULL), min_bit_width_(BIT_WIDTH_8) {}
+
+    Value(int64_t i, Type t, BitWidth bw)
+      : i_(i), type_(t), min_bit_width_(bw) {}
+    Value(uint64_t u, Type t, BitWidth bw)
+      : u_(u), type_(t), min_bit_width_(bw) {}
+
+    Value(float f)
+      : f_(f), type_(TYPE_FLOAT), min_bit_width_(BIT_WIDTH_32) {}
+    Value(double f)
+      : f_(f), type_(TYPE_FLOAT), min_bit_width_(WidthF(f)) {}
+
+    uint8_t StoredPackedType(BitWidth parent_bit_width_= BIT_WIDTH_8) const {
+      return PackedType(StoredWidth(parent_bit_width_), type_);
+    }
+
+    BitWidth ElemWidth(size_t buf_size, size_t elem_index) const {
+      if (IsInline(type_)) {
+        return min_bit_width_;
+      } else {
+        // We have an absolute offset, but want to store a relative offset
+        // elem_index elements beyond the current buffer end. Since whether
+        // the relative offset fits in a certain byte_width depends on
+        // the size of the elements before it (and their alignment), we have
+        // to test for each size in turn.
+        for (size_t byte_width = 1;
+             byte_width <= sizeof(flatbuffers::largest_scalar_t);
+             byte_width *= 2) {
+          // Where are we going to write this offset?
+          auto offset_loc =
+            buf_size +
+            flatbuffers::PaddingBytes(buf_size, byte_width) +
+            elem_index * byte_width;
+          // Compute relative offset.
+          auto offset = offset_loc - u_;
+          // Does it fit?
+          auto bit_width = WidthU(offset);
+          if (1U << bit_width == byte_width) return bit_width;
+        }
+        assert(false);  // Must match one of the sizes above.
+        return BIT_WIDTH_64;
+      }
+    }
+
+    BitWidth StoredWidth(BitWidth parent_bit_width_ = BIT_WIDTH_8) const {
+      if (IsInline(type_)) {
+          return std::max(min_bit_width_, parent_bit_width_);
+      } else {
+          return min_bit_width_;
+      }
+    }
+  };
+
+  void WriteAny(const Value &val, uint8_t byte_width) {
+    switch (val.type_) {
+      case TYPE_NULL:
+      case TYPE_INT:
+        Write(val.i_, byte_width);
+        break;
+      case TYPE_UINT:
+        Write(val.u_, byte_width);
+        break;
+      case TYPE_FLOAT:
+        WriteDouble(val.f_, byte_width);
+        break;
+      default:
+        WriteOffset(val.u_, byte_width);
+        break;
+    }
+  }
+
+  size_t CreateBlob(const void *data, size_t len, size_t trailing, Type type) {
+    auto bit_width = WidthU(len);
+    auto byte_width = Align(bit_width);
+    Write<uint64_t>(len, byte_width);
+    auto sloc = buf_.size();
+    WriteBytes(data, len + trailing);
+    stack_.push_back(Value(static_cast<uint64_t>(sloc), type, bit_width));
+    return sloc;
+  }
+
+  template<typename T> size_t ScalarVector(const T *elems, size_t len,
+                                           bool fixed) {
+    auto vector_type = GetScalarType<T>();
+    auto byte_width = sizeof(T);
+    auto bit_width = WidthB(byte_width);
+    // If you get this assert, you're trying to write a vector with a size
+    // field that is bigger than the scalars you're trying to write (e.g. a
+    // byte vector > 255 elements). For such types, write a "blob" instead.
+    // TODO: instead of asserting, could write vector with larger elements
+    // instead, though that would be wasteful.
+    assert(WidthU(len) <= bit_width);
+    if (!fixed) Write(len, byte_width);
+    auto vloc = buf_.size();
+    for (size_t i = 0; i < len; i++) Write(elems[i], byte_width);
+    stack_.push_back(Value(static_cast<uint64_t>(vloc),
+                           ToTypedVector(vector_type, fixed ? len : 0),
+                           bit_width));
+    return vloc;
+  }
+
+  Value CreateVector(size_t start, size_t vec_len, size_t step, bool typed,
+                     bool fixed, const Value *keys = nullptr) {
+    // Figure out smallest bit width we can store this vector with.
+    auto bit_width = std::max(force_min_bit_width_, WidthU(vec_len));
+    auto prefix_elems = 1;
+    if (keys) {
+      // If this vector is part of a map, we will pre-fix an offset to the keys
+      // to this vector.
+      bit_width = std::max(bit_width, keys->ElemWidth(buf_.size(), 0));
+      prefix_elems += 2;
+    }
+    Type vector_type = TYPE_KEY;
+    // Check bit widths and types for all elements.
+    for (size_t i = start; i < stack_.size(); i += step) {
+      auto elem_width = stack_[i].ElemWidth(buf_.size(), i + prefix_elems);
+      bit_width = std::max(bit_width, elem_width);
+      if (typed) {
+        if (i == start) {
+          vector_type = stack_[i].type_;
+        } else {
+          // If you get this assert, you are writing a typed vector with
+          // elements that are not all the same type.
+          assert(vector_type == stack_[i].type_);
+        }
+      }
+    }
+    // If you get this assert, your fixed types are not one of:
+    // Int / UInt / Float / Key.
+    assert(IsTypedVectorElementType(vector_type));
+    auto byte_width = Align(bit_width);
+    // Write vector. First the keys width/offset if available, and size.
+    if (keys) {
+      WriteOffset(keys->u_, byte_width);
+      Write(1U << keys->min_bit_width_, byte_width);
+    }
+    if (!fixed) Write(vec_len, byte_width);
+    // Then the actual data.
+    auto vloc = buf_.size();
+    for (size_t i = start; i < stack_.size(); i += step) {
+      WriteAny(stack_[i], byte_width);
+    }
+    // Then the types.
+    if (!typed) {
+      for (size_t i = start; i < stack_.size(); i += step) {
+        buf_.push_back(stack_[i].StoredPackedType(bit_width));
+      }
+    }
+    return Value(static_cast<uint64_t>(vloc), keys
+                         ? TYPE_MAP
+                         : (typed
+                            ? ToTypedVector(vector_type, fixed ? vec_len : 0)
+                            : TYPE_VECTOR),
+                       bit_width);
+  }
+
+  // You shouldn't really be copying instances of this class.
+  Builder(const Builder &);
+  Builder &operator=(const Builder &);
+
+  std::vector<uint8_t> buf_;
+  std::vector<Value> stack_;
+
+  bool finished_;
+
+  BuilderFlag flags_;
+
+  BitWidth force_min_bit_width_;
+
+  struct KeyOffsetCompare {
+    KeyOffsetCompare(const std::vector<uint8_t> &buf) : buf_(&buf) {}
+    bool operator() (size_t a, size_t b) const {
+      auto stra = reinterpret_cast<const char *>(buf_->data() + a);
+      auto strb = reinterpret_cast<const char *>(buf_->data() + b);
+      return strcmp(stra, strb) < 0;
+    }
+    const std::vector<uint8_t> *buf_;
+  };
+
+  typedef std::pair<size_t, size_t> StringOffset;
+  struct StringOffsetCompare {
+    StringOffsetCompare(const std::vector<uint8_t> &buf) : buf_(&buf) {}
+    bool operator() (const StringOffset &a, const StringOffset &b) const {
+      auto stra = reinterpret_cast<const char *>(buf_->data() + a.first);
+      auto strb = reinterpret_cast<const char *>(buf_->data() + b.first);
+      return strncmp(stra, strb, std::min(a.second, b.second) + 1) < 0;
+    }
+    const std::vector<uint8_t> *buf_;
+  };
+
+  typedef std::set<size_t, KeyOffsetCompare> KeyOffsetMap;
+  typedef std::set<StringOffset, StringOffsetCompare> StringOffsetMap;
+
+  KeyOffsetMap key_pool;
+  StringOffsetMap string_pool;
+};
+
+}  // namespace flexbuffers
+
+#endif  // FLATBUFFERS_FLEXBUFFERS_H_
diff --git a/tests/test.cpp b/tests/test.cpp
index 34841047e4447a53c07f4d6b5290f30e15edec11..8ffb2c117c15b3fa3c764408a035f37622157873 100644
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -27,6 +27,8 @@
   #include <random>
 #endif
 
+#include "flatbuffers/flexbuffers.h"
+
 using namespace MyGame::Example;
 
 #ifdef __ANDROID__
@@ -491,8 +493,6 @@ void ReflectionTest(uint8_t *flatbuf, size_t length) {
   TEST_NOTNULL(pos_table_ptr);
   TEST_EQ_STR(pos_table_ptr->name()->c_str(), "MyGame.Example.Vec3");
 
-
-
   // Now use it to dynamically access a buffer.
   auto &root = *flatbuffers::GetAnyRoot(flatbuf);
 
@@ -1360,6 +1360,66 @@ void ConformTest() {
   test_conform("enum E:byte { B, A }", "values differ for enum");
 }
 
+void FlexBuffersTest() {
+  flexbuffers::Builder slb(512,
+                           flexbuffers::BUILDER_FLAG_SHARE_KEYS_AND_STRINGS);
+
+  // Write the equivalent of:
+  // { vec: [ -100, "Fred", 4.0 ], bar: [ 1, 2, 3 ], foo: 100 }
+  slb.Map([&]() {
+     slb.Vector("vec", [&]() {
+      slb += -100;  // Equivalent to slb.Add(-100) or slb.Int(-100);
+      slb += "Fred";
+      slb.IndirectFloat(4.0f);
+    });
+    std::vector<int> ints = { 1, 2, 3 };
+    slb.Add("bar", ints);
+    slb.FixedTypedVector("bar3", ints.data(), ints.size());  // Static size.
+    slb.Double("foo", 100);
+    slb.Map("mymap", [&]() {
+      slb.String("foo", "Fred");  // Testing key and string reuse.
+    });
+  });
+  slb.Finish();
+
+  for (size_t i = 0; i < slb.GetBuffer().size(); i++)
+    printf("%d ", slb.GetBuffer().data()[i]);
+  printf("\n");
+
+  auto map = flexbuffers::GetRoot(slb.GetBuffer()).AsMap();
+  TEST_EQ(map.size(), 5);
+  auto vec = map["vec"].AsVector();
+  TEST_EQ(vec.size(), 3);
+  TEST_EQ(vec[0].AsInt64(), -100);
+  TEST_EQ_STR(vec[1].AsString().c_str(), "Fred");
+  TEST_EQ(vec[1].AsInt64(), 0);  // Number parsing failed.
+  TEST_EQ(vec[2].AsDouble(), 4.0);
+  TEST_EQ(vec[2].AsString().IsTheEmptyString(), true);  // Wrong Type.
+  TEST_EQ_STR(vec[2].AsString().c_str(), "");  // This still works though.
+  TEST_EQ_STR(vec[2].ToString().c_str(), "4");  // Or have it converted.
+  auto tvec = map["bar"].AsTypedVector();
+  TEST_EQ(tvec.size(), 3);
+  TEST_EQ(tvec[2].AsInt8(), 3);
+  auto tvec3 = map["bar3"].AsFixedTypedVector();
+  TEST_EQ(tvec3.size(), 3);
+  TEST_EQ(tvec3[2].AsInt8(), 3);
+  TEST_EQ(map["foo"].AsUInt8(), 100);
+  TEST_EQ(map["unknown"].IsNull(), true);
+  auto mymap = map["mymap"].AsMap();
+  // These should be equal by pointer equality, since key and value are shared.
+  TEST_EQ(mymap.Keys()[0].AsKey(), map.Keys()[2].AsKey());
+  TEST_EQ(mymap.Values()[0].AsString().c_str(), vec[1].AsString().c_str());
+  // We can mutate values in the buffer.
+  TEST_EQ(vec[0].MutateInt(-99), true);
+  TEST_EQ(vec[0].AsInt64(), -99);
+  TEST_EQ(vec[1].MutateString("John"), true);  // Size must match.
+  TEST_EQ_STR(vec[1].AsString().c_str(), "John");
+  TEST_EQ(vec[1].MutateString("Alfred"), false);  // Too long.
+  TEST_EQ(vec[2].MutateFloat(2.0f), true);
+  TEST_EQ(vec[2].AsFloat(), 2.0f);
+  TEST_EQ(vec[2].MutateFloat(3.14159), false);  // Double does not fit in float.
+}
+
 int main(int /*argc*/, const char * /*argv*/[]) {
   // Run our various test suites:
 
@@ -1399,6 +1459,8 @@ int main(int /*argc*/, const char * /*argv*/[]) {
   ParseUnionTest();
   ConformTest();
 
+  FlexBuffersTest();
+
   if (!testing_fails) {
     TEST_OUTPUT_LINE("ALL TESTS PASSED");
     return 0;