diff --git a/spec/book.typ b/spec/book.typ
index 1bf944862..ff9502d43 100644
--- a/spec/book.typ
+++ b/spec/book.typ
@@ -5,6 +5,7 @@
 #book-meta(
   title: "Lambda VM specification",
   summary: [
+    #chapter("memory.typ")[Memory argument]
     #chapter("variables.typ")[Variables]
     #chapter("is_bit.typ")[IS_BIT template]
     #chapter("add.typ")[ADD template]
@@ -27,3 +28,17 @@
 ]
 #let rj = todo.with(background: teal, name: "Robin")
 #let et = todo.with(background: rgb("d4aa3a"), name: "Erik")
+
+#let style = state("style", (
+  foreground: white,
+))
+
+#let aside(title, body) = context figure(
+  block(inset: (left: 1em, right: 1em, bottom: 1em), stroke: style.final().foreground, breakable: false)[
+    #block(inset: (left: 1em, right: 1em, top: .75em, bottom: .75em),
+           width: 100% + 2em,
+           fill: rgb("55aaff"),
+           stroke: style.final().foreground,
+           align(center, strong(text(fill: black, title))))
+    #align(left, body)
+])
diff --git a/spec/ebook.typ b/spec/ebook.typ
index abddf2701..410e926bb 100644
--- a/spec/ebook.typ
+++ b/spec/ebook.typ
@@ -1,8 +1,12 @@
 #import "@preview/shiroa:0.3.1": *
+#import "/book.typ": style
 
 #import "/templates/ebook.typ"
 
 #show: ebook.project.with(title: "typst-book", spec: "book.typ")
+#style.update((
+  foreground: black,
+))
 
 // set a resolver for inclusion
 #ebook.resolve-inclusion(it => include it)
diff --git a/spec/memory.typ b/spec/memory.typ
new file mode 100644
index 000000000..6687d733d
--- /dev/null
+++ b/spec/memory.typ
@@ -0,0 +1,233 @@
+#import "/book.typ": book-page, rj, aside
+#import "/src.typ": load_config, load_chip
+#import "/chip.typ": (
+  render_chip_assumptions,
+  render_chip_column_table,
+  render_chip_padding_table,
+  render_constraint_table,
+  total_nr_instantiated_columns,
+  total_nr_variables,
+)
+
+#let config = load_config()
+#let chip = load_chip("src/page.toml", config)
+
+#show: book-page.with(title: "Memory argument")
+
+= Memory argument
+
+As part of fully proving the correct execution of a RISC-V program,
+the VM must ensure that memory reads and writes are consistent.
+That is, every byte read from some address corresponds to the byte that was last written to that address
+--- or the initial value if nothing has been written yet.
+We consider "memory" in a broad sense here:
+both RAM and the general purpose registers can be seen as instantiations of memory
+and are therefore handled simultaneously.
+#footnote[
+  While RAM is byte addressed, we do choose to store registers as a `DWordWL` over two word addresses.
+]
+
+On a high level, we ensure memory consistency by an interacting system of
+reads and writes to a lookup argument, combined with an initialization and finalization scheme.
+The initialization and finalization schemes together ensure both that (1) the necessary preconditions
+for the lookup system are satisfied, and (2) the program is executed with the correct
+initial memory and register contents as specified by the ELF binary and the ISA.
+
+== Memory types
+
+A commonly made distinction of memory types is that of _read-only_ and _read-write_ memory,
+with the more restrictive read-only variant often allowing for more efficient solutions
+(be that regarding prover time, verifier time or proof size) via table lookup proofs.
+Naturally, the VM’s main memory and registers should be handled by a read-write system
+as the guest program/environment can issue instructions that write to memory.
+While there are some subsystems that can be modelled as read-only memory
+---e.g., the program memory and instruction decoding---
+we opt to integrate these into the proof system via chip interactions (relying on techniques derived from table lookup arguments).
+As such, we only concern ourselves with read-write memory, moving forward.
+
+== Memory operations
+
+Every memory operation has some conceptual attributes that are relevant to mention or discuss:
+
+- The type of operation (read or write)
+- The memory address --- this is an address in the broad sense:
+  main memory and registers have their own dedicated part of the unified address space.
+- The value being read from or written to the memory address
+- When the value was read or written, see the below paragraph
+
+Since we will have to ensure that memory accesses are temporally consistent within the execution of the VM,
+we additionally consider a _timestamp_ for  every memory access, that should be strictly increasing.
+As such, it should never be possible for the system to generate accesses to the same address at identical timestamps.
+Multiple memory accesses can (and indeed will, consider e.g. register reads) occur in a single execution cycle of the VM,
+so we cannot use the cycle counter directly as timestamp for register accesses.
+We can, however, statically bound the maximal number of memory accesses made during a single execution by a granularity constant $k$
+and derive timestamps from the cycle counter.
+The $i$th possible memory access in cycle $c$ will obtain as timestamp the value $k dot c + i$.
+For simplicity, we will always reserve a timestamp for every possible memory access, and leave the timestamp unused if an instruction does not use it.
+
+
+#aside[Note on "simultaneous" memory accesses][
+  For reasons of completeness (since temporal integrity as discussed below is a security necessity),
+  we cannot deal with multiple accesses to the same address at identical timestamps.
+  However, if multiple accesses are guaranteed to be independent (that is, to different addresses), they can still share a timestamp
+  --- consider, e.g., the case of reading a word as 4 bytes with the `LW` load instruction.
+  This property is already taken into account where possible in the design of the system.
+  For instance, in the CPU chip, we can ensure that there are at most 3 memory accesses not guaranteed
+  to be independent, so a timestamp granularity of 4 timestamps per cycle is enough.
+]
+
+
+== Permutation argument
+
+We can conceptually organise the state of the memory as a collection of "tokens" that represent tuples
+$(serif("timestamp"), serif("address"), serif("value"))$,
+meaning the current value written to $serif("address")$ is $serif("value")$,
+last written to memory at $serif("timestamp")$.
+Having exactly one value associated with any address will be ensured (see further down in this document)
+by the interaction of memory initialization, memory finalization, and the effects of memory operations.
+
+Each memory operation will then do two things:
+
+- Consume the current token in the memory
+- Emit a new token to replace it
+
+Naturally, for a read operation, the _values_ embedded in the consumed and emitted tokens must be identical.
+From the need to consume a token even on the first memory access,
+we can see the necessity for a memory initialization procedure
+---in addition to having to make sure the initial memory content lines up with what the binary dictates.
+
+So long as we can properly constrain temporal integrity (that is, no memory operation can consume future tokens),
+this "balancing" act of tokens can be integrated (with sufficient domain separation) into the existing LogUp argument:
+consuming a token corresponds to a "receive" and emitting a new token is a "send".
+#rj[properly link/refer to the logup spec]
+
+== Temporal integrity
+
+To ensure temporal integrity, every memory operation needs to be constrained for the newly emitted token
+to have a strictly greater timestamp than the consumed token.
+This raises the question of how to represent timestamps and cleanly perform this check,
+as over a finite field the “less than” relation is ill-defined
+(though it is common and natural to consider it as the less than relation over the natural lift of the field into the integers).
+We choose to represent timestamps as machine words, using the existing `LT` chip functionality for comparisons.
+#rj[Properly link/refer to the LT chip]
+
+#aside[Note on options and trade-offs for timestamp representation][
+ #grid(columns: (1fr, 1fr), gutter: 1em)[#align(center, emph[Machine word])][#align(center, emph[Field element])][
+    - Clean definition of “less-than”, using the already existing `LT` functionality in the ALU
+    - Harder to perform increments, needing extra constraints beyond field arithmetic
+      - But this can be alleviated by providing a precomputed column that has a fixed increment per CPU row
+  ][
+    - Comparison is more annoying, but can work by:
+      - Decomposition into a machine word and chip interaction with the LT chip
+      - Bit decomposition and comparison constraints
+      - Range-checking the difference to be sufficiently small w.r.t. the field characteristic.
+    - Increments and basic arithmetic operations are cheap
+  ]
+]
+
+#rj[reference to CPU chip/timestamp column and MEMW chip]
+
+== Initialization and Finalization
+
+Because the LogUp argument handling token consumption and emission needs to be fully balanced
+--- every token emitted should be consumed, and vice versa ---
+we need to have a system to emit the initial tokens and consume the final tokens.
+This needs to ensure that every address has at most a single initializing emission, and at most one finalizing consumption.
+Having at most one initialization will, through the correctness of the lookup argument,
+immediately lead to having at most one correct finalization, and vice versa.
+
+The initialization will need to correspond to a fixed initial register state for the VM,
+as well as the memory loaded from the program binary, zero-initialization of memory elsewhere, and private input provided by the prover.
+The contribution of initialization with static data from the ELF executable and the initial register state to the sum
+can be handled directly by the verifier, ensuring correctness corresponding to the ELF binary being proven.
+This leaves only zero-initialization and prover input as prover-side concerns for initialization,
+alongside the finalization of the entire used memory.
+
+For our chosen scheme (which we refer to as "paged initialization/finalization"),
+the available memory range is split into equally (power-of-two) sized "pages".
+Each address can then be represented as `address = page_base_address + page_offset`,
+with `page_base_address` being "page-aligned", and `page_offset` belonging to a limited range (the page size).
+As such, initialization or finalization of a page is represented by a table with columns `page`, `offset`, `value`, and ---for finalization--- `timestamp`.
+The `page` column is a preprocessed, constant value (which can be entirely virtualized/inlined into the constraints for this table),
+and the `offset` column is a preprocessed column containing its row index.
+Depending on the type of initialization, `value` can be a prover-committed column (input data), or a precomputed, constant column containing `0` (free memory space).
+This table then feeds into the LogUp system in the normal way,
+emitting the initial tokens for all addresses in a page, without consuming any tokens.
+Since the `offset` column is always the same, it can be reused across all paged initialization and finalization tables.
+
+Concretely, each page gets an associated `PAGE` table, consisting of #total_nr_variables(chip) variables
+over #total_nr_instantiated_columns(chip, config) columns.
+For each such table, the `page` variable is instantiated as the constant base address of the page.
+The `offset` column is preprocessed, which helps the verifier ensure that each page has a single fixed size,
+but the verifier should still check that no pages overlap and all `page` values are page-aligned.
+
+=== Page initialization
+
+#rj[check whether we need `fini` to be range-checked]
+We present here a set of constraints on the `PAGE` table that
+
++ enforces the initial and final values of each address are bytes
++ adds the initial and final interaction to the LogUp argument
+
+For zero-initialized pages, `init` can be a constant `0`,
+and hence doesn't need a column, nor a range check.
+
+#render_chip_column_table(chip, config)
+#render_constraint_table(chip, config)
+
+
+#aside[Note on alternatives and trade-offs][
+  We identify a few alternatives that would achieve the desired initialization/finalization functionalities, and consider their respective trade-offs.
+
+  _"Free-zero" initialization_
+
+  Zero-initialization could be achieved by allowing the `MEMW` chip to output a zero
+  without consuming a token from the lookup argument.
+  This would in turn be made secure by finalization consuming at most one token per address:
+  if an address is initialized more than once, the proof cannot be finalized.
+  - This requires fewer pages (and hence tables) for zero-initialization.
+  - But it comes at a cost of added complexity in the `MEMW `chip, and likely some extra columns to handle this.
+    Keeping track of initialized addresses, and potentially having to initialize only some of the bytes in a word-read
+    may make bookkeeping challenging.
+  - This is an alternative form of sparse initialization (see below), so it is incompatible with paged finalization.
+    Paged finalization can be made into a compatible sparse form by adding a bit-checked multiplicity column.
+
+  _Sparse initialization/finalization_
+
+  One or more STARK tables (depending on the amount of memory used) consisting of `(address, value)` columns are introduced,
+  where for zero-initialization, `value` can be constant zero.
+  Transition constraints ensure that `address` is strictly increasing, enforcing the "at most once" property;
+  `value` is range-checked to consist of bytes.
+  Similar to paged finalization, an additional `timestamp` column is added, containing the final timestamp each address was accessed.
+  This table is then further used to contribute to the LogUp sum as with any other interactions.
+  - The transition constraints can be chosen to only apply on finalization, as at-most-once finalization is enough to ensure consistency.
+  - Sparse initialization is incompatible with paged finalization, see also the remark under free-zero initialization above.
+  - This would require transition constraints, which currently are not needed elsewhere in the VM design
+    - Additionally, for memory use exceeding the capacity of a single initialization/finalization table, some form of transition constraint between tables is needed
+    - Alternatively, transition constraints could potentially be avoided by more integration into the LogUp system, but this could turn out more costly in practice
+  - This is compatible with the above "free zero" initialization
+  - Since a prover-committed address column is needed (rather than a precomputed one), the number of required columns increases.
+    - As an optimization, the address column could potentially be used simultaneously for initialization and finalization
+  - Sparse initialization/finalization reduces the cost for sparse memory access patterns,
+    where only a few addresses would be accessed per page.
+      Most programs and compilers should however favor a memory locality that makes paged initialization/finalization comparable.
+]
+
+=== Register initialization/finalization
+
+#rj[Properly link/reference ECALL/HALT chip]
+The initial and final state of registers can be entirely known by
+the verifier, since the relevant initialization values are either zero,
+or embedded in the ELF, and the final values can be set to a known value
+by the HALT ecall.
+As additionally, the number of registers is small, the verifier can directly
+add the required balancing terms to the LogUp sum.
+
+== Notes and considerations
+
+- Register reads and writes may interact within a single cycle, so a correct and fixed ordering needs to be ensured
+- Correctness of initialization and completeness of finalization need to be ensured
+
+== Future topics of interest
+
+- Optimize memory systems after determining factual bottlenecks (e.g. taking inspiration from Twist and Shout, or other recent research)
diff --git a/spec/src/config.toml b/spec/src/config.toml
index 68f1683de..d836f80e5 100644
--- a/spec/src/config.toml
+++ b/spec/src/config.toml
@@ -127,6 +127,12 @@ subtypes = ["DWordWL"]
 desc = "A preprocessed column holding timestamps as `DWordWL`. Row `i` of the column contains the value $2^2 dot (i + 1)$. Used in the CPU chip, see there for more details about the magic number."
 preprocessed = true
 
+[[variables.types]]
+label = "RowIndex"
+subtypes = ["Word"]
+desc = "A preprocessed column holding the row index (zero-indexed)."
+preprocessed = true
+
 [variables.categories]
 all = ["input", "output", "auxiliary", "virtual", "multiplicity", "condition"]
 instantiated = ["input", "output", "auxiliary", "multiplicity"]
diff --git a/spec/src/page.toml b/spec/src/page.toml
new file mode 100644
index 000000000..8053d63df
--- /dev/null
+++ b/spec/src/page.toml
@@ -0,0 +1,57 @@
+name = "PAGE"
+
+# Input
+
+[[variables.input]]
+name = "offset"
+type = "RowIndex"
+desc = "The offset from the page base address."
+
+[[variables.input]]
+name = "init"
+type = "Byte"
+desc = "The initial value of this address. Can be replaced by a constant zero for zero-initialization"
+
+[[variables.input]]
+name = "fini"
+type = "Byte"
+desc = "The final value this address took"
+
+[[variables.input]]
+name = "timestamp"
+type = "DWordWL"
+desc = "The timestamp at which this address was last accessed"
+
+# Virtual
+
+[[variables.virtual]]
+name = "address"
+type = "DWordWL"
+desc = "Adding `offset` to the page base address `page`. `page` is a constant with respect to a single instance of this table."
+def = ["+", "page", ["cast", "offset", "DWordWL"]]
+
+
+[[constraint_groups]]
+name = "all"
+
+[[constraints.all]]
+kind = "interaction"
+tag = "IS_BYTE"
+input = ["init"]
+
+[[constraints.all]]
+kind = "interaction"
+tag = "IS_BYTE"
+input = ["fini"]
+
+[[constraints.all]]
+kind = "interaction"
+tag = "memory"
+input = [0, "address", 0, "init"]
+multiplicity = -1
+
+[[constraints.all]]
+kind = "interaction"
+tag = "memory"
+input = [0, "address", "timestamp", "fini"]
+multiplicity = 1