lightning · niftynei · Jul 22, 2019 · Jul 12, 2019 · Jul 12, 2019
diff --git a/.aspell.en.pws b/.aspell.en.pws
@@ -341,3 +341,5 @@ optimizations
 structs
 CompactSize
 encodings
+bigsize
+BigSize
diff --git a/01-messaging.md b/01-messaging.md
@@ -20,6 +20,7 @@ All data fields are unsigned big-endian unless otherwise specified.
     * [The `error` Message](#the-error-message)
   * [Control Messages](#control-messages)
     * [The `ping` and `pong` Messages](#the-ping-and-pong-messages)
+  * [Appendix B: BigSize Test Vectors](#appendix-b-bigsize-test-vectors)
   * [Acknowledgments](#acknowledgments)
   * [References](#references)
   * [Authors](#authors)
@@ -95,16 +96,21 @@ A `tlv_record` represents a single field, encoded in the form:
 * [`varint`: `length`]
 * [`length`: `value`]
 
+A `varint` is a variable-length, unsigned integer encoding using the
+[BigSize](#appendix-b-bigsize-test-vectors) format, which resembles the bitcoin
+CompactSize encoding but uses big-endian for multi-byte values rather than
+little-endian. 
+
 A `tlv_stream` is a series of (possibly zero) `tlv_record`s, represented as the
 concatenation of the encoded `tlv_record`s. When used to extend existing
 messages, a `tlv_stream` is typically placed after all currently defined fields.
 
-The `type` is a varint encoded using the bitcoin CompactSize format. It
-functions as a message-specific, 64-bit identifier for the `tlv_record`
-determining how the contents of `value` should be decoded.
+The `type` is a varint encoded using the BigSize format. It functions as a
+message-specific, 64-bit identifier for the `tlv_record` determining how the
+contents of `value` should be decoded.
 
-The `length` is a varint encoded using the bitcoin CompactSize format
-signaling the size of `value` in bytes.
+The `length` is a varint encoded using the BigSize format signaling the size of
+`value` in bytes.
 
 The `value` depends entirely on the `type`, and should be encoded or decoded
 according to the message-specific format determined by `type`.
@@ -385,6 +391,229 @@ every message maximally).
 Finally, the usage of periodic `ping` messages serves to promote frequent key
 rotations as specified within [BOLT #8](08-transport.md).
 
+## Appendix B: BigSize Test Vectors
+
+The following test vectors can be used to assert the correctness of a BigSize
+implementation used in the TLV format. The format is identical to the
+CompactSize encoding used in bitcoin, but replaces the little-endian encoding of
+multi-byte values with big-endian.
+
+Values encoded with BigSize will produce an encoding of either 1, 3, 5, or 9
+bytes depending on the size of the integer. The encoding is a piece-wise
+function that takes a `uint64` value `x` and produces:
+```
+        uint8(x)                if x < 0xfd
+        0xfd + be16(uint16(x))  if x < 0x10000
+        0xfe + be32(uint32(x))  if x < 0x100000000
+        0xff + be64(x)          otherwise.
+```
+
+Here `+` denotes concatenation and `be16`, `be32`, and `be64` produce a
+big-endian encoding of the input for 16, 32, and 64-bit integers, respectively.
+
+A value is said to be _minimally encoded_ if it could have been encoded using a
+smaller representation. For example, a BigSize encoding that occupies 5 bytes
+but whose value is less than 0x10000 is not minimally encoded. All values
+decoded with BigSize should be checked to ensure they are minimally encoded.
+
+### BigSize Decoding Tests
+
+The following is an example of how to execute the BigSize decoding tests.
+```golang
+func testReadVarInt(t *testing.T, test varIntTest) {
+        var buf [8]byte 
+        r := bytes.NewReader(test.Bytes)
+        val, err := tlv.ReadVarInt(r, &buf)
+        if err != nil && err.Error() != test.ExpErr {
+                t.Fatalf("expected decoding error: %v, got: %v",
+                        test.ExpErr, err)
+        }
+
+        // If we expected a decoding error, there's no point checking the value.
+        if test.ExpErr != "" {
+                return
+        }
+
+        if val != test.Value {
+                t.Fatalf("expected value: %d, got %d", test.Value, val)
+        }
+}
+```
+
+A correct implementation should pass against these test vectors:
+```json
+[
+    {
+        "name": "zero",
+        "value": 0,
+        "bytes": "00"
+    },
+    {
+        "name": "one byte high",
+        "value": 252,
+        "bytes": "fc"
+    },
+    {
+        "name": "two byte low",
+        "value": 253,
+        "bytes": "fd00fd"
+    },
+    {
+        "name": "two byte high",
+        "value": 65535,
+        "bytes": "fdffff"
+    },
+    {
+        "name": "four byte low",
+        "value": 65536,
+        "bytes": "fe00010000"
+    },
+    {
+        "name": "four byte high",
+        "value": 4294967295,
+        "bytes": "feffffffff"
+    },
+    {
+        "name": "eight byte low",
+        "value": 4294967296,
+        "bytes": "ff0000000100000000"
+    },
+    {
+        "name": "eight byte high",
+        "value": 18446744073709551615,
+        "bytes": "ffffffffffffffffff"
+    },
+    {
+        "name": "two byte not canonical",
+        "value": 0,
+        "bytes": "fd00fc",
+        "exp_error": "decoded varint is not canonical"
+    },
+    {
+        "name": "four byte not canonical",
+        "value": 0,
+        "bytes": "fe0000ffff",
+        "exp_error": "decoded varint is not canonical"
+    },
+    {
+        "name": "eight byte not canonical",
+        "value": 0,
+        "bytes": "ff00000000ffffffff",
+        "exp_error": "decoded varint is not canonical"
+    },
+    {
+        "name": "two byte short read",
+        "value": 0,
+        "bytes": "fd00",
+        "exp_error": "unexpected EOF"
+    },
+    {
+        "name": "four byte short read",
+        "value": 0,
+        "bytes": "feffff",
+        "exp_error": "unexpected EOF"
+    },
+    {
+        "name": "eight byte short read",
+        "value": 0,
+        "bytes": "ffffffffff",
+        "exp_error": "unexpected EOF"
+    },
+    {
+        "name": "one byte no read",
+        "value": 0,
+        "bytes": "",
+        "exp_error": "EOF"
+    },
+    {
+        "name": "two byte no read",
+        "value": 0,
+        "bytes": "fd",
+        "exp_error": "unexpected EOF"
+    },
+    {
+        "name": "four byte no read",
+        "value": 0,
+        "bytes": "fe",
+        "exp_error": "unexpected EOF"
+    },
+    {
+        "name": "eight byte no read",
+        "value": 0,
+        "bytes": "ff",
+        "exp_error": "unexpected EOF"
+    }
+]
+```
+
+### BigSize Encoding Tests
+
+The following is an example of how to execute the BigSize encoding tests.
+```golang
+func testWriteVarInt(t *testing.T, test varIntTest) {
+        var (
+                w   bytes.Buffer
+                buf [8]byte
+        )
+        err := tlv.WriteVarInt(&w, test.Value, &buf)
+        if err != nil {
+                t.Fatalf("unable to encode %d as varint: %v",
+                        test.Value, err)
+        }
+
+        if bytes.Compare(w.Bytes(), test.Bytes) != 0 {
+                t.Fatalf("expected bytes: %v, got %v",
+                        test.Bytes, w.Bytes())
+        }
+}
+```
+
+A correct implementation should pass against the following test vectors:
+```json
+[
+    {
+        "name": "zero",
+        "value": 0,
+        "bytes": "00"
+    },
+    {
+        "name": "one byte high",
+        "value": 252,
+        "bytes": "fc"
+    },
+    {
+        "name": "two byte low",
+        "value": 253,
+        "bytes": "fd00fd"
+    },
+    {
+        "name": "two byte high",
+        "value": 65535,
+        "bytes": "fdffff"
+    },
+    {
+        "name": "four byte low",
+        "value": 65536,
+        "bytes": "fe00010000"
+    },
+    {
+        "name": "four byte high",
+        "value": 4294967295,
+        "bytes": "feffffffff"
+    },
+    {
+        "name": "eight byte low",
+        "value": 4294967296,
+        "bytes": "ff0000000100000000"
+    },
+    {
+        "name": "eight byte high",
+        "value": 18446744073709551615,
+        "bytes": "ffffffffffffffffff"
+    }
+]
+```
+
 ## Acknowledgments
 
 [ TODO: (roasbeef); fin ]

diff --git a/tools/spellcheck.sh b/tools/spellcheck.sh
@@ -51,6 +51,7 @@ do
     if [ -n "$CHECK" ]; then
 	# Eliminate the following:
 	# Inline references eg. [Use of segwit](#use-of-segwit)
+	# Code blocks using ```
 	# quoted identifiers eg. `htlc_id`
 	# field descriptions, eg. `* [`num_htlcs*64`:`htlc_signature]'
 	# indented field names, eg. '    `num_htlcs`: 0'
@@ -60,6 +61,7 @@ do
 	# long hex strings
 	# long base58 strings
 	WORDS=$(sed -e 's/\]([-#a-zA-Z0-9_.]*)//g' \
+	    -e '/^```/,/^```/d' \
 	    -e 's/`[a-zA-Z0-9_]*`//g' \
 	    -e 's/\* \[`[_a-z0-9*]\+`://g' \
 	    -e 's/0x[a-fA-F0-9]\+//g' \
-Original file line number
+Diff line change
@@ Expand Up / @@ -341,3 +341,5 @@ optimizations @@
     structs
     CompactSize
     encodings
+    bigsize
+    BigSize