From 7b38e357eadde34ee789aa578ab4782420b60767 Mon Sep 17 00:00:00 2001
From: M-Fatah <MuhammadA.Fatah@gmail.com>
Date: Thu, 23 Apr 2026 01:04:26 +0200
Subject: [PATCH] Add Math to Core.

---
 .gitignore                             |    6 +-
 CMakeLists.txt                         |   26 +-
 README.md                              |  130 ++-
 core/CMakeLists.txt                    |   28 +
 core/base64.cpp                        |   36 +-
 core/base64.h                          |   10 +-
 core/containers/array.h                |   54 +-
 core/containers/hash_set.h             |    4 +-
 core/containers/hash_table.h           |   58 +-
 core/containers/ring_buffer.h          |  212 +++++
 core/containers/span.h                 |  216 +++++
 core/containers/stack_array.h          |   24 +-
 core/containers/string.h               |  156 ++--
 core/containers/string_interner.cpp    |    2 +-
 core/containers/string_interner.h      |    2 +-
 core/defines.h                         |   31 +-
 core/ecs.cpp                           |    2 +-
 core/ecs.h                             |   24 +-
 core/formatter.h                       |  212 +++--
 core/hash.h                            |   90 +--
 core/json.cpp                          |   28 +-
 core/json.h                            |    8 +-
 core/math/f32.h                        |  273 +++++++
 core/math/f32x2.h                      |  160 ++++
 core/math/f32x2x2.h                    |  105 +++
 core/math/f32x3.h                      |  172 ++++
 core/math/f32x3x3.h                    |  172 ++++
 core/math/f32x4.h                      |  245 ++++++
 core/math/f32x4x4.h                    |  459 +++++++++++
 core/math/f64.h                        |  259 ++++++
 core/math/f64x2.h                      |  197 +++++
 core/math/f64x2x2.h                    |  105 +++
 core/math/f64x3.h                      |  106 +++
 core/math/f64x3x3.h                    |  151 ++++
 core/math/f64x4.h                      |  216 +++++
 core/math/f64x4x4.h                    |  207 +++++
 core/math/i32.h                        |   41 +
 core/math/i32x2.h                      |   33 +
 core/math/i32x3.h                      |   33 +
 core/math/i32x4.h                      |  187 +++++
 core/math/i64.h                        |   41 +
 core/math/quaternion.h                 |  409 ++++++++++
 core/math/random.h                     |  207 +++++
 core/math/u32.h                        |   27 +
 core/math/u32x2.h                      |   31 +
 core/math/u32x3.h                      |   31 +
 core/math/u32x4.h                      |  151 ++++
 core/math/u64.h                        |   27 +
 core/memory/arena_allocator.cpp        |   62 +-
 core/memory/arena_allocator.h          |   14 +-
 core/memory/heap_allocator.cpp         |  128 ++-
 core/memory/heap_allocator.h           |    4 +-
 core/memory/memory.h                   |   48 +-
 core/memory/pool_allocator.cpp         |   18 +-
 core/memory/pool_allocator.h           |    6 +-
 core/platform/platform.h               |  149 ++--
 core/platform/platform_linux.cpp       |   63 ++
 core/platform/platform_macos.mm        |  114 +--
 core/platform/platform_win32.cpp       |  281 ++++---
 core/reflect.h                         |  146 ++--
 core/serialization/binary_serializer.h |   48 +-
 core/serialization/json_serializer.h   |   52 +-
 core/source_location.h                 |    4 +-
 core/tester.cpp                        |   16 +-
 core/tester.h                          |   22 +-
 core/utils.h                           |    6 +-
 docs/containers.md                     |  369 +++++++++
 docs/defer.md                          |   42 +
 docs/ecs.md                            |   81 ++
 docs/formatter.md                      |  107 +++
 docs/hash.md                           |   48 ++
 docs/home.md                           |   57 ++
 docs/math.md                           |  224 +++++
 docs/memory.md                         |  116 +++
 docs/platform.md                       |   67 ++
 docs/print-log.md                      |   59 ++
 docs/reflect.md                        |   87 ++
 docs/result.md                         |   65 ++
 docs/serialization.md                  |  109 +++
 docs/validate.md                       |   32 +
 test/src/test.cpp                      |   94 +--
 unittest/CMakeLists.txt                |   13 +-
 unittest/src/bench_math.cpp            |  535 ++++++++++++
 unittest/src/unittest.cpp              |    4 +-
 unittest/src/unittest_containers.cpp   |  502 +++++++++---
 unittest/src/unittest_core.cpp         |   12 +-
 unittest/src/unittest_formatter.cpp    |   32 +-
 unittest/src/unittest_math.cpp         | 1031 ++++++++++++++++++++++++
 unittest/src/unittest_platform.cpp     |   26 +-
 unittest/src/unittest_reflect.cpp      |  258 +++---
 unittest/src/unittest_serializer.cpp   |  174 ++--
 91 files changed, 9515 insertions(+), 1214 deletions(-)
 create mode 100644 core/containers/ring_buffer.h
 create mode 100644 core/containers/span.h
 create mode 100644 core/math/f32.h
 create mode 100644 core/math/f32x2.h
 create mode 100644 core/math/f32x2x2.h
 create mode 100644 core/math/f32x3.h
 create mode 100644 core/math/f32x3x3.h
 create mode 100644 core/math/f32x4.h
 create mode 100644 core/math/f32x4x4.h
 create mode 100644 core/math/f64.h
 create mode 100644 core/math/f64x2.h
 create mode 100644 core/math/f64x2x2.h
 create mode 100644 core/math/f64x3.h
 create mode 100644 core/math/f64x3x3.h
 create mode 100644 core/math/f64x4.h
 create mode 100644 core/math/f64x4x4.h
 create mode 100644 core/math/i32.h
 create mode 100644 core/math/i32x2.h
 create mode 100644 core/math/i32x3.h
 create mode 100644 core/math/i32x4.h
 create mode 100644 core/math/i64.h
 create mode 100644 core/math/quaternion.h
 create mode 100644 core/math/random.h
 create mode 100644 core/math/u32.h
 create mode 100644 core/math/u32x2.h
 create mode 100644 core/math/u32x3.h
 create mode 100644 core/math/u32x4.h
 create mode 100644 core/math/u64.h
 create mode 100644 docs/containers.md
 create mode 100644 docs/defer.md
 create mode 100644 docs/ecs.md
 create mode 100644 docs/formatter.md
 create mode 100644 docs/hash.md
 create mode 100644 docs/home.md
 create mode 100644 docs/math.md
 create mode 100644 docs/memory.md
 create mode 100644 docs/platform.md
 create mode 100644 docs/print-log.md
 create mode 100644 docs/reflect.md
 create mode 100644 docs/result.md
 create mode 100644 docs/serialization.md
 create mode 100644 docs/validate.md
 create mode 100644 unittest/src/bench_math.cpp
 create mode 100644 unittest/src/unittest_math.cpp

diff --git a/.gitignore b/.gitignore
index 3598210d..2dfe63ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,8 @@ packages
 
 .idea/
 .cache/
-cmake-build-debug-visual-studio/
\ No newline at end of file
+cmake-build-debug-visual-studio/
+
+# macOS AppleDouble metadata sidecar files (created on non-HFS volumes)
+._*
+.DS_Store
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d21b2a5d..4a26d19a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,12 +35,34 @@ target_compile_definitions(core-options INTERFACE
     $<$<PLATFORM_ID:Darwin>:PLATFORM_MACOS=1>
 )
 target_compile_options(core-options INTERFACE
-    $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>: -Werror -Wall -Wno-c++2b-extensions -Wno-enum-constexpr-conversion -Wno-c++11-narrowing -Wno-deprecated-declarations -Wno-invalid-offsetof>
+    $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>: -Werror -Wall -Wno-c++2b-extensions -Wno-c++11-narrowing -Wno-deprecated-declarations -Wno-invalid-offsetof -Wno-missing-braces>
     $<$<CXX_COMPILER_ID:GNU>: -Werror -Wall -Wextra -std=c++2b -Wno-invalid-offsetof -Wno-missing-field-initializers>
-    $<$<CXX_COMPILER_ID:MSVC>: -WX -W4 -Zc:preprocessor>
+    $<$<CXX_COMPILER_ID:MSVC>: -WX -W4 -Zc:preprocessor -wd4201>
 )
 target_include_directories(core-options INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/)
 
+# SIMD arch gates — compile-time selection per target architecture.
+# ARM64 (Apple Silicon, Linux/Windows ARM): NEON, always available on the ISA.
+# x86_64 (Windows/Linux): AVX, Sandy Bridge (2011+) required. Anything beyond AVX
+# (AVX2, AVX-512) is not guaranteed and stays out of scope.
+#
+# `CORE_SIMD_FORCE_SCALAR` overrides arch detection for parity testing — the
+# arch flags are not emitted when it's on, so every math op falls through to its
+# scalar branch.
+option(CORE_SIMD_FORCE_SCALAR "Force scalar SIMD fallback for parity testing." OFF)
+if(CORE_SIMD_FORCE_SCALAR)
+    target_compile_definitions(core-options INTERFACE SIMD_FORCE_SCALAR=1)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64|aarch64)$")
+    target_compile_definitions(core-options INTERFACE SIMD_NEON=1)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|x64)$")
+    target_compile_definitions(core-options INTERFACE SIMD_AVX=1)
+    if(MSVC)
+        target_compile_options(core-options INTERFACE /arch:AVX)
+    else()
+        target_compile_options(core-options INTERFACE -mavx)
+    endif()
+endif()
+
 add_subdirectory(core)
 add_subdirectory(test)
 
diff --git a/README.md b/README.md
index 71adf318..c63c0f92 100644
--- a/README.md
+++ b/README.md
@@ -3,56 +3,128 @@
 [![Build status](https://github.com/M-Fatah/core/workflows/CI/badge.svg)](https://github.com/M-Fatah/core/actions?workflow=CI)
 ![Lines of code](https://img.shields.io/tokei/lines/github/M-Fatah/core)
 <!-- badges: end -->
+
 ---
 
-## **Introduction:**
-Core is a C-like C++ collection of utilities used as a foundation when writing programs in a data oriented fashion.
+## **Introduction**
 
-It started as a learning process that eventually evolved to be a useful container library on top of C++.
+**Core** is a C-like C++20 library providing foundational utilities for data-oriented programming. It is designed as a replacement for the STL with a simpler, more explicit design:
 
-Its written in C++20.
+- **Explicit allocations** — every container accepts an `Allocator *`, making every allocation visible and swappable (heap, arena, pool, temp).
+- **No exceptions** — errors are returned as `Error` values.
+- **Explicit lifetimes** — `_init` / `_deinit` pairs make every allocation and ownership transfer visible at the call site.
+- **C-like style** — free functions over methods, plain structs over class hierarchies.
 
-It is still a WIP, lots of breaking changes are expected to happen.
+> This library is still a WIP — breaking changes are expected.
 
-## **Code style:**
-```C++
-#include <core/defines.h>
-#include <core/containers/array.h>
+---
 
-struct Vector3
-{
-    f32 x, y, z;
-};
+## **Modules**
+
+| Module | Description |
+|---|---|
+| `core/defines.h` | Primitive aliases (`I8`..`I64`, `U8`..`U64`, `F32`, `F64`) and macro utilities |
+| `core/memory/` | Allocator interface, heap, arena, pool, and temp allocators (with alignment) |
+| `core/containers/array.h` | Dynamic array (`Array<T>`) |
+| `core/containers/stack_array.h` | Fixed-capacity stack array (`Stack_Array<T, N>`) |
+| `core/containers/span.h` | Non-owning view (`Span<T>`) |
+| `core/containers/string.h` | Heap-allocated string (`String`) |
+| `core/containers/hash_table.h` | Open-addressing hash table (`Hash_Table<K, V>`) |
+| `core/containers/hash_set.h` | Hash set (`Hash_Set<K>`) |
+| `core/containers/string_interner.h` | String deduplication (`String_Interner`) |
+| `core/math/` | Vectors, matrices, quaternion, scalar helpers, random — NEON / AVX / scalar |
+| `core/formatter.h` | Type-safe string formatting (`format()`) — math types format natively |
+| `core/print.h` / `core/log.h` | Colored output and log levels |
+| `core/defer.h` | Scope-exit macro (`DEFER`) |
+| `core/tester.h` | Minimal unit-test framework (`TESTER_TEST`, `TESTER_CHECK`) |
+| `core/validate.h` | Runtime assertions with source location |
+| `core/result.h` | Error-returning pattern (`Error`, `Result<T>`) |
+| `core/hash.h` | FNV-32 and type-generic `hash()` |
+| `core/reflect.h` | Compile-time type reflection |
+| `core/serialization/` | Binary and JSON serializers |
+| `core/ecs.h` | Minimal entity-component system |
+| `core/platform/platform.h` | File I/O, path utilities, native dialogs |
+
+📖 **Full documentation is in the [`docs/`](docs/home.md) folder.**
+
+---
+
+## **Quick Example**
+
+```cpp
+#include <core/containers/array.h>
+#include <core/math/f32x3.h>
+#include <core/math/f32x4x4.h>
+#include <core/defer.h>
+#include <core/log.h>
 
 struct Vertex
 {
-    Vector3 position;
-    Vector3 normal;
+    F32x3 position;
+    F32x3 normal;
 };
 
-Array<Vertex> vertices = array_init<Vertex>(memory::heap_allocator());
+auto vertices = array_init<Vertex>();
 DEFER(array_deinit(vertices));
 
-array_push(vertices, Vertex{{1.0f, 2.0f, 3.0f}, {0.0f, 0.0f, 1.0f}});
+array_push(vertices, Vertex{F32x3{1.0f, 2.0f, 3.0f}, F32X3_UP});
+array_push(vertices, Vertex{F32x3{4.0f, 5.0f, 6.0f}, F32X3_FORWARD});
+
+F32x4x4 view = f32x4x4_look_at(F32x3{0, 0, 5}, F32X3_ZERO, F32X3_UP);
+log_info("loaded {} vertices, view matrix:\n{}", vertices.count, view);
 ```
 
-## **Platforms:**
-- Windows.
-- Linux.
-- Mac.
+---
+
+## **Platforms**
 
-## **Prerequisites:**
-#### **Windows:**
-- Download and install [CMake](https://cmake.org/download/) (version 3.20 atleast).
-#### **Linux:**
-```
+| Platform | Status |
+|---|---|
+| Windows | ✅ |
+| Linux | ✅ |
+| macOS | ✅ |
+
+---
+
+## **Prerequisites**
+
+#### **Windows**
+- [CMake](https://cmake.org/download/) 3.20+
+
+#### **Linux**
+```bash
 sudo apt update
 sudo apt-get install -y cmake libx11-dev libxkbcommon-x11-dev libx11-xcb-dev zenity
 ```
 
-## **Building:**
-```
+#### **macOS**
+- Xcode Command Line Tools: `xcode-select --install`
+- [CMake](https://cmake.org/download/) 3.20+
+
+---
+
+## **Building**
+
+```bash
 cmake -B build
 cmake --build build --config Debug -j
 ```
-Output is in `build/bin/${CONFIG}/` directory.
\ No newline at end of file
+
+Output is placed in `build/bin/Debug/`.
+
+### Options
+
+| CMake Option | Default | Description |
+|---|---|---|
+| `CORE_BUILD_UNITTEST` | ON (main project) | Build unit tests |
+| `CORE_INSTALL` | ON (main project) | Enable install target |
+| `CORE_BUILD_UNITY` | OFF | Enable unity (single-TU) build |
+| `CORE_BUILD_STATIC` | OFF | Build as a static library |
+
+### Running Tests
+
+```bash
+cmake -B build
+cmake --build build --config Debug
+./build/bin/Debug/unittest
+```
\ No newline at end of file
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 149a7761..6e044eb8 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -16,9 +16,37 @@ set(HEADER_FILES
     containers/array.h
     containers/hash_set.h
     containers/hash_table.h
+    containers/ring_buffer.h
+    containers/span.h
     containers/stack_array.h
     containers/string_interner.h
     containers/string.h
+    math/f32.h
+    math/f64.h
+    math/i32.h
+    math/i64.h
+    math/u32.h
+    math/u64.h
+    math/f32x2.h
+    math/f32x3.h
+    math/f32x4.h
+    math/f64x2.h
+    math/f64x3.h
+    math/f64x4.h
+    math/i32x2.h
+    math/i32x3.h
+    math/i32x4.h
+    math/u32x2.h
+    math/u32x3.h
+    math/u32x4.h
+    math/f32x2x2.h
+    math/f32x3x3.h
+    math/f32x4x4.h
+    math/f64x2x2.h
+    math/f64x3x3.h
+    math/f64x4x4.h
+    math/quaternion.h
+    math/random.h
     memory/memory.h
     memory/heap_allocator.h
     memory/arena_allocator.h
diff --git a/core/base64.cpp b/core/base64.cpp
index 1dd6ef63..e921d172 100644
--- a/core/base64.cpp
+++ b/core/base64.cpp
@@ -3,25 +3,25 @@
 inline static constexpr const char *BASE64_CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
 String
-base64_encode(const u8 *data, u64 size, memory::Allocator *allocator)
+base64_encode(const U8 *data, U64 size, memory::Allocator *allocator)
 {
 	String out = string_init(allocator);
 
-	for (u64 i = 0; i < size; i += 3)
+	for (U64 i = 0; i < size; i += 3)
 	{
-		i32 value = (data[i] << 16) + (data[i + 1] << 8) + (data[i + 2]);
+		I32 value = (data[i] << 16) + (data[i + 1] << 8) + (data[i + 2]);
 
-		for (u64 j = 0; j < 4; ++j)
+		for (U64 j = 0; j < 4; ++j)
 		{
-			i32 x = (value >> ((3 - j) * 6)) & 0x3F;
+			I32 x = (value >> ((3 - j) * 6)) & 0x3F;
 			string_append(out, BASE64_CHARACTERS[x]);
 		}
 	}
 
-	i32 padding = size % 3;
+	I32 padding = size % 3;
 	if (padding > 0)
 	{
-		for (i32 i = 0; i < (3 - padding); ++i)
+		for (I32 i = 0; i < (3 - padding); ++i)
 		{
 			out[out.count - i - 1] = '=';
 		}
@@ -33,8 +33,8 @@ base64_encode(const u8 *data, u64 size, memory::Allocator *allocator)
 String
 base64_decode(const String &data, memory::Allocator *allocator)
 {
-	constexpr auto index = [](const String &data, char c) -> u8 {
-		for (u8 i = 0; i < data.count; ++i)
+	constexpr auto index = [](const String &data, char c) -> U8 {
+		for (U8 i = 0; i < data.count; ++i)
 		{
 			if (data[i] == c)
 			{
@@ -47,26 +47,26 @@ base64_decode(const String &data, memory::Allocator *allocator)
 
 	String out = string_init(allocator);
 
-	for (u64 i = 0; i < data.count; i += 4)
+	for (U64 i = 0; i < data.count; i += 4)
 	{
-		i32 a = index(string_literal(BASE64_CHARACTERS), data[i + 0]);
-		i32 b = index(string_literal(BASE64_CHARACTERS), data[i + 1]);
-		i32 c = index(string_literal(BASE64_CHARACTERS), data[i + 2]);
-		i32 d = index(string_literal(BASE64_CHARACTERS), data[i + 3]);
+		I32 a = index(string_literal(BASE64_CHARACTERS), data[i + 0]);
+		I32 b = index(string_literal(BASE64_CHARACTERS), data[i + 1]);
+		I32 c = index(string_literal(BASE64_CHARACTERS), data[i + 2]);
+		I32 d = index(string_literal(BASE64_CHARACTERS), data[i + 3]);
 
-		u32 value = (a << 18) |
+		U32 value = (a << 18) |
 					(b << 12) |
 					(c <<  6) |
 					(d);
 
-		for (u64 j = 0; j < 3; ++j)
+		for (U64 j = 0; j < 3; ++j)
 		{
 			char byte = (value >> ((2 - j) * 8)) & 0xFF;
 			string_append(out, byte);
 		}
 	}
 
-	u64 padding = 0;
+	U64 padding = 0;
 	for (char c : data)
 	{
 		if (c == '=')
@@ -75,7 +75,7 @@ base64_decode(const String &data, memory::Allocator *allocator)
 		}
 	}
 
-	for (u64 i = 0; i < padding; ++i)
+	for (U64 i = 0; i < padding; ++i)
 	{
 		string_remove_last(out);
 	}
diff --git a/core/base64.h b/core/base64.h
index 0d24c9d6..330cd0cb 100644
--- a/core/base64.h
+++ b/core/base64.h
@@ -7,21 +7,21 @@
 
 /*
 	TODO:
-	- [ ] Should decode return a String or an Array<u8>?
+	- [ ] Should decode return a String or an Array<U8>?
 	- [ ] Should use Result<T>?
 */
 
 CORE_API String
-base64_encode(const u8 *data, u64 size, memory::Allocator *allocator = memory::heap_allocator());
+base64_encode(const U8 *data, U64 size, memory::Allocator *allocator = memory::heap_allocator());
 
 inline static String
 base64_encode(const Block &data, memory::Allocator *allocator = memory::heap_allocator())
 {
-	return base64_encode((const u8 *)data.data, data.size, allocator);
+	return base64_encode((const U8 *)data.data, data.size, allocator);
 }
 
 inline static String
-base64_encode(const Array<u8> &data, memory::Allocator *allocator = memory::heap_allocator())
+base64_encode(const Array<U8> &data, memory::Allocator *allocator = memory::heap_allocator())
 {
 	return base64_encode(data.data, data.count, allocator);
 }
@@ -29,7 +29,7 @@ base64_encode(const Array<u8> &data, memory::Allocator *allocator = memory::heap
 inline static String
 base64_encode(const String &data, memory::Allocator *allocator = memory::heap_allocator())
 {
-	return base64_encode((const u8 *)data.data, data.count, allocator);
+	return base64_encode((const U8 *)data.data, data.count, allocator);
 }
 
 inline static String
diff --git a/core/containers/array.h b/core/containers/array.h
index 70e2b1c8..e1fdd34e 100644
--- a/core/containers/array.h
+++ b/core/containers/array.h
@@ -13,18 +13,18 @@ struct Array
 {
 	memory::Allocator *allocator;
 	T *data;
-	u64 count;
-	u64 capacity;
+	U64 count;
+	U64 capacity;
 
-	T &
-	operator[](u64 index)
+	inline T &
+	operator[](U64 index)
 	{
 		validate(index < count, "[ARRAY]: Access out of range.");
 		return data[index];
 	}
 
-	const T &
-	operator[](u64 index) const
+	inline const T &
+	operator[](U64 index) const
 	{
 		validate(index < count, "[ARRAY]: Access out of range.");
 		return data[index];
@@ -45,12 +45,12 @@ array_init(memory::Allocator *allocator = memory::heap_allocator())
 
 template <typename T>
 inline static Array<T>
-array_init_with_capacity(u64 capacity, memory::Allocator *allocator = memory::heap_allocator())
+array_init_with_capacity(U64 capacity, memory::Allocator *allocator = memory::heap_allocator())
 {
 	allocator = allocator ? allocator : memory::heap_allocator();
 	return Array<T> {
 		.allocator = allocator,
-		.data = (T *)memory::allocate(allocator, capacity * sizeof(T)),
+		.data = memory::allocate<T>(allocator, capacity),
 		.count = 0,
 		.capacity = capacity
 	};
@@ -58,12 +58,12 @@ array_init_with_capacity(u64 capacity, memory::Allocator *allocator = memory::he
 
 template <typename T>
 inline static Array<T>
-array_init_with_count(u64 count, memory::Allocator *allocator = memory::heap_allocator())
+array_init_with_count(U64 count, memory::Allocator *allocator = memory::heap_allocator())
 {
 	allocator = allocator ? allocator : memory::heap_allocator();
 	return Array<T> {
 		.allocator = allocator,
-		.data = (T *)memory::allocate(allocator, count * sizeof(T)),
+		.data = memory::allocate<T>(allocator, count),
 		.count = count,
 		.capacity = count
 	};
@@ -91,7 +91,7 @@ inline static Array<T>
 array_copy(const Array<T> &self, memory::Allocator *allocator = memory::heap_allocator())
 {
 	Array<T> copy = array_init_with_count<T>(self.count, allocator);
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 		copy[i] = self[i];
 	return copy;
 }
@@ -102,12 +102,12 @@ array_deinit(Array<T> &self)
 {
 	if (self.capacity && self.allocator)
 		memory::deallocate(self.allocator, self.data);
-	self = Array<T>{self.allocator};
+	self = Array<T>{.allocator = self.allocator};
 }
 
 template <typename T>
 inline static void
-array_reserve(Array<T> &self, u64 added_capacity)
+array_reserve(Array<T> &self, U64 added_capacity)
 {
 	if (self.count + added_capacity < self.capacity)
 		return;
@@ -117,8 +117,8 @@ array_reserve(Array<T> &self, u64 added_capacity)
 
 	self.capacity += added_capacity;
 
-	T *data = (T *)memory::allocate(self.allocator, self.capacity * sizeof(T));
-	for (u64 i = 0; i < self.count; ++i)
+	T *data = memory::allocate<T>(self.allocator, self.capacity);
+	for (U64 i = 0; i < self.count; ++i)
 		data[i] = self[i];
 	memory::deallocate(self.allocator, self.data);
 
@@ -127,7 +127,7 @@ array_reserve(Array<T> &self, u64 added_capacity)
 
 template <typename T>
 inline static void
-array_resize(Array<T> &self, u64 new_count)
+array_resize(Array<T> &self, U64 new_count)
 {
 	if (new_count > self.count)
 		array_reserve(self, new_count - self.count);
@@ -145,9 +145,9 @@ array_push(Array<T> &self, const R &value)
 
 template <typename T>
 inline static void
-array_push(Array<T> &self, const T &value, u64 count)
+array_push(Array<T> &self, const T &value, U64 count)
 {
-	u64 i = self.count;
+	U64 i = self.count;
 	array_resize(self, self.count + count);
 	for (; i < self.count; ++i)
 		self[i] = value;
@@ -165,7 +165,7 @@ array_pop(Array<T> &self)
 
 template <typename T>
 inline static void
-array_remove(Array<T> &self, u64 index)
+array_remove(Array<T> &self, U64 index)
 {
 	validate(index < self.count, "[ARRAY]: Access out of range.");
 	if ((index + 1) != self.count)
@@ -181,7 +181,7 @@ template <typename T, typename P>
 inline static void
 array_remove_if(Array<T> &self, P &&predicate)
 {
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 	{
 		if (predicate(self[i]))
 		{
@@ -193,7 +193,7 @@ array_remove_if(Array<T> &self, P &&predicate)
 
 template <typename T>
 inline static void
-array_remove_ordered(Array<T> &self, u64 index)
+array_remove_ordered(Array<T> &self, U64 index)
 {
 	validate(index < self.count, "[ARRAY]: Access out of range.");
 	::memmove(self.data + index, self.data + index + 1, (self.count - index - 1) * sizeof(T));
@@ -204,7 +204,7 @@ template <typename T, typename P>
 inline static void
 array_remove_ordered_if(Array<T> &self, P &&predicate)
 {
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 	{
 		if (predicate(self[i]))
 		{
@@ -218,9 +218,9 @@ template <typename T>
 inline static void
 array_append(Array<T> &self, const Array<T> &other)
 {
-	u64 old_count = self.count;
+	U64 old_count = self.count;
 	array_resize(self, self.count + other.count);
-	for (u64 i = 0; i < other.count; ++i)
+	for (U64 i = 0; i < other.count; ++i)
 		self[old_count + i] = other[i];
 }
 
@@ -228,7 +228,7 @@ template <typename T, typename R>
 inline static void
 array_fill(Array<T> &self, const R &value)
 {
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 		self[i] = (T)value;
 }
 
@@ -248,7 +248,7 @@ array_is_empty(const Array<T> &self)
 
 template <typename T>
 inline static T &
-array_first(Array<T> &self)
+array_front(Array<T> &self)
 {
 	validate(self.count > 0, "[ARRAY]: Count is 0.");
 	return self[0];
@@ -256,7 +256,7 @@ array_first(Array<T> &self)
 
 template <typename T>
 inline static T &
-array_last(Array<T> &self)
+array_back(Array<T> &self)
 {
 	validate(self.count > 0, "[ARRAY]: Count is 0.");
 	return self[self.count - 1];
diff --git a/core/containers/hash_set.h b/core/containers/hash_set.h
index 1abb91d1..bf05b4f8 100644
--- a/core/containers/hash_set.h
+++ b/core/containers/hash_set.h
@@ -26,7 +26,7 @@ hash_set_init(memory::Allocator *allocator = memory::heap_allocator())
 
 template <typename K>
 inline static Hash_Set<K>
-hash_set_init_with_capacity(u64 capacity, memory::Allocator *allocator = memory::heap_allocator())
+hash_set_init_with_capacity(U64 capacity, memory::Allocator *allocator = memory::heap_allocator())
 {
 	return hash_table_init_with_capacity<K, Hash_Set_Value>(capacity, allocator);
 }
@@ -57,7 +57,7 @@ hash_set_deinit(Hash_Set<K> &self)
 
 template <typename K>
 inline static void
-hash_set_reserve(Hash_Set<K> &self, u64 added_capacity)
+hash_set_reserve(Hash_Set<K> &self, U64 added_capacity)
 {
 	hash_table_reserve(self, added_capacity);
 }
diff --git a/core/containers/hash_table.h b/core/containers/hash_table.h
index 039aec95..18d598a4 100644
--- a/core/containers/hash_table.h
+++ b/core/containers/hash_table.h
@@ -26,8 +26,8 @@ enum HASH_TABLE_SLOT_FLAGS
 
 struct Hash_Table_Slot
 {
-	u64 entry_index;
-	u64 hash_value;
+	U64 entry_index;
+	U64 hash_value;
 	HASH_TABLE_SLOT_FLAGS flags;
 };
 
@@ -73,8 +73,8 @@ struct Hash_Table
 {
 	Array<Hash_Table_Slot> slots;
 	Array<Hash_Table_Entry<K, V>> entries;
-	u64 count;
-	u64 capacity;
+	U64 count;
+	U64 capacity;
 
 	inline Hash_Table_Entry_Proxy<K, V>
 	operator[](const K &key)
@@ -102,10 +102,10 @@ hash_table_init(memory::Allocator *allocator = memory::heap_allocator())
 
 template <typename K, typename V>
 inline static Hash_Table<K, V>
-hash_table_init_with_capacity(u64 capacity, memory::Allocator *allocator = memory::heap_allocator())
+hash_table_init_with_capacity(U64 capacity, memory::Allocator *allocator = memory::heap_allocator())
 {
 	Hash_Table<K, V> self = {
-		.slots    = array_init_with_count<Hash_Table_Slot>(capacity > 8 ? next_power_of_two((i32)capacity) : 8, allocator),
+		.slots    = array_init_with_count<Hash_Table_Slot>(capacity > 8 ? next_power_of_two((I32)capacity) : 8, allocator),
 		.entries  = array_init<Hash_Table_Entry<K, V>>(allocator),
 		.count    = 0,
 		.capacity = self.slots.count
@@ -147,22 +147,22 @@ hash_table_deinit(Hash_Table<K, V> &self)
 
 template <typename K, typename V>
 inline static void
-hash_table_reserve(Hash_Table<K, V> &self, u64 added_capacity)
+hash_table_reserve(Hash_Table<K, V> &self, U64 added_capacity)
 {
 	if (added_capacity == 0)
 		return;
 
-	u64 new_capacity = self.count + added_capacity;
+	U64 new_capacity = self.count + added_capacity;
 	if (new_capacity < self.slots.count)
 		return;
 
-	array_resize(self.slots, next_power_of_two((i32)new_capacity));
+	array_resize(self.slots, next_power_of_two((I32)new_capacity));
 	array_fill(self.slots, Hash_Table_Slot{});
 
-	for (u64 i = 0; i < self.entries.count; ++i)
+	for (U64 i = 0; i < self.entries.count; ++i)
 	{
-		u64 hash_value       = hash(self.entries[i].key);
-		u64 slot_index       = hash_value & (self.slots.count - 1);
+		U64 hash_value       = hash(self.entries[i].key);
+		U64 slot_index       = hash_value & (self.slots.count - 1);
 		Hash_Table_Slot slot = self.slots[slot_index];
 		while (slot.flags == HASH_TABLE_SLOT_FLAGS_USED)
 		{
@@ -188,9 +188,9 @@ hash_table_find(const Hash_Table<K, V> &self, const K &key)
 	if (self.count == 0)
 		return nullptr;
 
-	u64 hash_value       = hash(key);
-	u64 slot_index       = hash_value & (self.capacity - 1);
-	u64 start_slot_index = slot_index;
+	U64 hash_value       = hash(key);
+	U64 slot_index       = hash_value & (self.capacity - 1);
+	U64 start_slot_index = slot_index;
 	Hash_Table_Slot slot = self.slots[slot_index];
 	while (true)
 	{
@@ -243,8 +243,8 @@ hash_table_insert(Hash_Table<K, V> &self, const K &key, const V &value)
 	else if (self.count + 1 > self.capacity - (self.capacity >> 2))
 		hash_table_reserve(self, self.capacity);
 
-	u64 hash_value       = hash(key);
-	u64 slot_index       = hash_value & (self.capacity - 1);
+	U64 hash_value       = hash(key);
+	U64 slot_index       = hash_value & (self.capacity - 1);
 	Hash_Table_Slot slot = self.slots[slot_index];
 	while (slot.flags == HASH_TABLE_SLOT_FLAGS_USED)
 	{
@@ -283,10 +283,10 @@ template <typename K, typename V>
 inline static bool
 hash_table_remove(Hash_Table<K, V> &self, const K &key)
 {
-	constexpr auto find_slot_index = [](Hash_Table<K, V> &self, const K &key) -> u64 {
-		u64 hash_value       = hash(key);
-		u64 slot_index       = hash_value & (self.capacity - 1);
-		u64 start_slot_index = slot_index;
+	constexpr auto find_slot_index = [](Hash_Table<K, V> &self, const K &key) -> U64 {
+		U64 hash_value       = hash(key);
+		U64 slot_index       = hash_value & (self.capacity - 1);
+		U64 start_slot_index = slot_index;
 		Hash_Table_Slot slot = self.slots[slot_index];
 		while (true)
 		{
@@ -321,11 +321,11 @@ hash_table_remove(Hash_Table<K, V> &self, const K &key)
 	if (self.count == 0)
 		return false;
 
-	if (u64 slot_index = find_slot_index(self, key); slot_index != U64_MAX)
+	if (U64 slot_index = find_slot_index(self, key); slot_index != U64_MAX)
 	{
 		Hash_Table_Slot &slot = self.slots[slot_index];
 		if (slot.entry_index < self.entries.count - 1)
-			self.slots[find_slot_index(self, array_last(self.entries).key)].entry_index = slot.entry_index;
+			self.slots[find_slot_index(self, array_back(self.entries).key)].entry_index = slot.entry_index;
 
 		array_remove(self.entries, slot.entry_index);
 		slot.flags = HASH_TABLE_SLOT_FLAGS_DELETED;
@@ -352,10 +352,10 @@ template <typename K, typename V>
 inline static bool
 hash_table_remove_ordered(Hash_Table<K, V> &self, const K &key)
 {
-	constexpr auto find_slot_index = [](Hash_Table<K, V> &self, const K &key) -> u64 {
-		u64 hash_value       = hash(key);
-		u64 slot_index       = hash_value & (self.capacity - 1);
-		u64 start_slot_index = slot_index;
+	constexpr auto find_slot_index = [](Hash_Table<K, V> &self, const K &key) -> U64 {
+		U64 hash_value       = hash(key);
+		U64 slot_index       = hash_value & (self.capacity - 1);
+		U64 start_slot_index = slot_index;
 		Hash_Table_Slot slot = self.slots[slot_index];
 		while (true)
 		{
@@ -390,11 +390,11 @@ hash_table_remove_ordered(Hash_Table<K, V> &self, const K &key)
 	if (self.count == 0)
 		return false;
 
-	if (u64 slot_index = find_slot_index(self, key); slot_index != U64_MAX)
+	if (U64 slot_index = find_slot_index(self, key); slot_index != U64_MAX)
 	{
 		Hash_Table_Slot &slot = self.slots[slot_index];
 		if (slot.entry_index < self.entries.count - 1)
-			for (u64 i = slot.entry_index + 1; i < self.entries.count; ++i)
+			for (U64 i = slot.entry_index + 1; i < self.entries.count; ++i)
 				self.slots[find_slot_index(self, self.entries[i].key)].entry_index = i - 1;
 
 		array_remove_ordered(self.entries, slot.entry_index);
diff --git a/core/containers/ring_buffer.h b/core/containers/ring_buffer.h
new file mode 100644
index 00000000..ebeb971f
--- /dev/null
+++ b/core/containers/ring_buffer.h
@@ -0,0 +1,212 @@
+#pragma once
+
+#include "core/defines.h"
+#include "core/validate.h"
+#include "core/memory/memory.h"
+
+#include <string.h>
+#include <type_traits>
+
+template <typename T>
+struct Ring_Buffer
+{
+	memory::Allocator *allocator;
+	T *data;
+	U64 count;
+	U64 capacity;
+	U64 head;
+
+	inline T &
+	operator[](U64 index)
+	{
+		validate(index < count, "[RING_BUFFER]: Access out of range.");
+		return data[(head + index) % capacity];
+	}
+
+	inline const T &
+	operator[](U64 index) const
+	{
+		validate(index < count, "[RING_BUFFER]: Access out of range.");
+		return data[(head + index) % capacity];
+	}
+};
+
+template <typename T>
+inline static Ring_Buffer<T>
+ring_buffer_init(memory::Allocator *allocator = memory::heap_allocator())
+{
+	return Ring_Buffer<T> {
+		.allocator = allocator,
+		.data = nullptr,
+		.count = 0,
+		.capacity = 0,
+		.head = 0,
+	};
+}
+
+template <typename T>
+inline static Ring_Buffer<T>
+ring_buffer_copy(const Ring_Buffer<T> &self, memory::Allocator *allocator = memory::heap_allocator())
+{
+	Ring_Buffer<T> copy = ring_buffer_init<T>(allocator);
+	ring_buffer_reserve(copy, self.count);
+	copy.count = self.count;
+	for (U64 i = 0; i < self.count; ++i)
+		copy[i] = self[i];
+	return copy;
+}
+
+template <typename T>
+inline static void
+ring_buffer_deinit(Ring_Buffer<T> &self)
+{
+	if (self.data == nullptr)
+		return;
+
+	memory::deallocate(self.allocator, self.data);
+	self = Ring_Buffer<T>{.allocator = self.allocator};
+}
+
+template <typename T>
+inline static void
+ring_buffer_reserve(Ring_Buffer<T> &self, U64 added_capacity)
+{
+	if (self.count + added_capacity <= self.capacity)
+		return;
+
+	if (self.allocator == nullptr)
+		self.allocator = memory::heap_allocator();
+
+	U64 next_cap     = (U64)(self.capacity * 1.5f);
+	U64 needed_cap   = self.count + added_capacity;
+	U64 new_capacity = next_cap > needed_cap ? next_cap : needed_cap;
+
+	T *new_data = memory::allocate<T>(self.allocator, new_capacity);
+
+	if (self.count)
+	{
+		const U64 first_chunk = self.capacity - self.head;
+		if (first_chunk >= self.count)
+		{
+			::memcpy(new_data, self.data + self.head, self.count * sizeof(T));
+		}
+		else
+		{
+			::memcpy(new_data, self.data + self.head, first_chunk * sizeof(T));
+			::memcpy(new_data + first_chunk, self.data, (self.count - first_chunk) * sizeof(T));
+		}
+	}
+
+	if (self.capacity)
+		memory::deallocate(self.allocator, self.data);
+
+	self.data     = new_data;
+	self.capacity = new_capacity;
+	self.head     = 0;
+}
+
+template <typename T, typename R>
+inline static void
+ring_buffer_push_front(Ring_Buffer<T> &self, const R &data)
+{
+	if (self.count == self.capacity)
+		ring_buffer_reserve(self, self.capacity ? 1 : 8);
+	self.head = self.head ? self.head - 1 : self.capacity - 1;
+	self.data[self.head] = data;
+	++self.count;
+}
+
+template <typename T, typename R>
+inline static void
+ring_buffer_push_back(Ring_Buffer<T> &self, const R &data)
+{
+	if (self.count == self.capacity)
+		ring_buffer_reserve(self, self.capacity ? 1 : 8);
+	self.data[(self.head + self.count) % self.capacity] = data;
+	++self.count;
+}
+
+template <typename T>
+inline static void
+ring_buffer_pop_front(Ring_Buffer<T> &self)
+{
+	validate(self.count > 0, "[RING_BUFFER]: Count is 0.");
+	self.head = (self.head + 1) % self.capacity;
+	--self.count;
+}
+
+template <typename T>
+inline static void
+ring_buffer_pop_back(Ring_Buffer<T> &self)
+{
+	validate(self.count > 0, "[RING_BUFFER]: Count is 0.");
+	--self.count;
+}
+
+template <typename T>
+inline static T &
+ring_buffer_front(Ring_Buffer<T> &self)
+{
+	validate(self.count > 0, "[RING_BUFFER]: Count is 0.");
+	return self.data[self.head];
+}
+
+template <typename T>
+inline static const T &
+ring_buffer_front(const Ring_Buffer<T> &self)
+{
+	validate(self.count > 0, "[RING_BUFFER]: Count is 0.");
+	return self.data[self.head];
+}
+
+template <typename T>
+inline static T &
+ring_buffer_back(Ring_Buffer<T> &self)
+{
+	validate(self.count > 0, "[RING_BUFFER]: Count is 0.");
+	return self.data[(self.head + self.count - 1) % self.capacity];
+}
+
+template <typename T>
+inline static const T &
+ring_buffer_back(const Ring_Buffer<T> &self)
+{
+	validate(self.count > 0, "[RING_BUFFER]: Count is 0.");
+	return self.data[(self.head + self.count - 1) % self.capacity];
+}
+
+template <typename T>
+inline static void
+ring_buffer_clear(Ring_Buffer<T> &self)
+{
+	self.head  = 0;
+	self.count = 0;
+}
+
+template <typename T>
+inline static bool
+ring_buffer_is_empty(const Ring_Buffer<T> &self)
+{
+	return self.count == 0;
+}
+
+template <typename T>
+inline static Ring_Buffer<T>
+clone(const Ring_Buffer<T> &self, memory::Allocator *allocator = memory::heap_allocator())
+{
+	Ring_Buffer<T> copy = ring_buffer_copy(self, allocator);
+	if constexpr (std::is_class_v<T>)
+		for (U64 i = 0; i < self.count; ++i)
+			copy[i] = clone(copy[i]);
+	return copy;
+}
+
+template <typename T>
+inline static void
+destroy(Ring_Buffer<T> &self)
+{
+	if constexpr (std::is_class_v<T>)
+		for (U64 i = 0; i < self.count; ++i)
+			destroy(self[i]);
+	ring_buffer_deinit(self);
+}
\ No newline at end of file
diff --git a/core/containers/span.h b/core/containers/span.h
new file mode 100644
index 00000000..78024181
--- /dev/null
+++ b/core/containers/span.h
@@ -0,0 +1,216 @@
+#pragma once
+
+#include "core/defines.h"
+#include "core/validate.h"
+#include "core/containers/array.h"
+#include "core/containers/stack_array.h"
+
+#include <initializer_list>
+#include <type_traits>
+
+/*
+TODO:
+- [ ] Consider naming _init to _from, since we don't allocate memory.
+*/
+
+template <typename T>
+struct Span
+{
+	T *data;
+	U64 count;
+
+	// Default — empty span.
+	Span() : data(nullptr), count(0) {}
+
+	// Trivially copyable / movable — explicit so our converting constructors
+	// below don't inhibit the compiler-synthesized copy.
+	Span(const Span &) = default;
+	Span(Span &&) = default;
+	Span &operator=(const Span &) = default;
+	Span &operator=(Span &&) = default;
+
+	// Raw pointer + count.
+	Span(T *data_, U64 count_) : data(data_), count(count_) {}
+
+	// Half-open [begin, end) range.
+	Span(T *begin_, T *end_) : data(begin_), count(U64(end_ - begin_)) {}
+
+	// C-style array.
+	template <U64 N>
+	Span(T (&array)[N]) : data(array), count(N) {}
+
+	// Braced / initializer list — wraps const storage (initializer_list<T>::begin() is const T *).
+	// Only participates in overload resolution when T is `const U`.
+	Span(std::initializer_list<std::remove_const_t<T>> list) requires std::is_const_v<T>
+		: data(list.begin()), count(list.size()) {}
+
+	// From the engine's containers.
+	Span(Array<T> &array) : data(array.data), count(array.count) {}
+
+	template <U64 N>
+	Span(Stack_Array<T, N> &array) : data(array.data), count(array.count) {}
+
+	// Single element — span of size 1 over the given value's storage. The caller
+	// must keep the referenced value alive for the span's lifetime.
+	Span(T &value) : data(&value), count(1) {}
+
+	// Convert Span<T> to Span<const T> (the mutable → const view). Only enabled
+	// when T is `const U` so this doesn't shadow the copy constructor.
+	Span(const Span<std::remove_const_t<T>> &other) requires std::is_const_v<T>
+		: data(other.data), count(other.count) {}
+
+	inline T &
+	operator[](U64 index)
+	{
+		validate(index < count, "[SPAN]: Access out of range.");
+		return data[index];
+	}
+
+	inline const T &
+	operator[](U64 index) const
+	{
+		validate(index < count, "[SPAN]: Access out of range.");
+		return data[index];
+	}
+};
+
+// Deduction guides so `Span{data, count}` and friends pick up T without a user annotation.
+template <typename T> Span(T *, U64) -> Span<T>;
+template <typename T> Span(T *, T *) -> Span<T>;
+template <typename T, U64 N> Span(T (&)[N]) -> Span<T>;
+template <typename T> Span(Array<T> &) -> Span<T>;
+template <typename T, U64 N> Span(Stack_Array<T, N> &) -> Span<T>;
+template <typename T> Span(T &) -> Span<T>;
+template <typename T> Span(std::initializer_list<T>) -> Span<const T>;
+
+// ---- span_init helpers (retained for existing call sites) ------------------
+
+template <typename T>
+inline static Span<T>
+span_init(T *data, U64 count)
+{
+	return Span<T>(data, count);
+}
+
+template <typename T>
+inline static Span<T>
+span_init(T *begin, T *end)
+{
+	return Span<T>(begin, end);
+}
+
+template <typename T, U64 N>
+inline static Span<T>
+span_init(T (&array)[N])
+{
+	return Span<T>(array);
+}
+
+template <typename T>
+inline static Span<const T>
+span_init(std::initializer_list<T> list)
+{
+	return Span<const T>(list);
+}
+
+template <typename T>
+inline static Span<T>
+span_init(Array<T> &array)
+{
+	return Span<T>(array);
+}
+
+template <typename T, U64 N>
+inline static Span<T>
+span_init(Stack_Array<T, N> &array)
+{
+	return Span<T>(array);
+}
+
+template <typename T>
+inline static Span<T>
+span_init(T &value)
+{
+	return Span<T>(value);
+}
+
+inline static Span<const char>
+span_init(const char *string)
+{
+	constexpr auto c_string_length = [](const char *str) -> U64 {
+		U64 length = 0;
+		while (str[length] != '\0')
+			++length;
+		return length;
+	};
+	return Span<const char>(string, c_string_length(string));
+}
+
+// ---- Accessors / iteration -------------------------------------------------
+
+template <typename T>
+inline static bool
+span_is_empty(const Span<T> &self)
+{
+	return self.count == 0;
+}
+
+template <typename T>
+inline static T &
+span_first(Span<T> &self)
+{
+	validate(self.count > 0, "[SPAN]: Count is 0.");
+	return self[0];
+}
+
+template <typename T>
+inline static const T &
+span_first(const Span<T> &self)
+{
+	validate(self.count > 0, "[SPAN]: Count is 0.");
+	return self[0];
+}
+
+template <typename T>
+inline static T &
+span_last(Span<T> &self)
+{
+	validate(self.count > 0, "[SPAN]: Count is 0.");
+	return self[self.count - 1];
+}
+
+template <typename T>
+inline static const T &
+span_last(const Span<T> &self)
+{
+	validate(self.count > 0, "[SPAN]: Count is 0.");
+	return self[self.count - 1];
+}
+
+template <typename T>
+inline static T *
+begin(Span<T> &self)
+{
+	return self.data;
+}
+
+template <typename T>
+inline static const T *
+begin(const Span<T> &self)
+{
+	return self.data;
+}
+
+template <typename T>
+inline static T *
+end(Span<T> &self)
+{
+	return self.data + self.count;
+}
+
+template <typename T>
+inline static const T *
+end(const Span<T> &self)
+{
+	return self.data + self.count;
+}
diff --git a/core/containers/stack_array.h b/core/containers/stack_array.h
index e244c368..01bfc3c2 100644
--- a/core/containers/stack_array.h
+++ b/core/containers/stack_array.h
@@ -5,11 +5,11 @@
 
 #include <initializer_list>
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 struct Stack_Array
 {
 	T data[N];
-	u64 count;
+	U64 count;
 
 	Stack_Array() : data(), count(0) {}
 
@@ -19,7 +19,7 @@ struct Stack_Array
 		count = N;
 	}
 
-	template <u64 NN>
+	template <U64 NN>
 	Stack_Array(const T (&values)[NN])
 	{
 		::memcpy(data, values, sizeof(T) * NN);
@@ -27,21 +27,21 @@ struct Stack_Array
 	}
 
 	T &
-	operator[](u64 index)
+	operator[](U64 index)
 	{
 		validate(index < count, "[STACK_ARRAY]: Access out of range.");
 		return data[index];
 	}
 
 	const T &
-	operator[](u64 index) const
+	operator[](U64 index) const
 	{
 		validate(index < count, "[STACK_ARRAY]: Access out of range.");
 		return data[index];
 	}
 };
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static void
 stack_array_push(Stack_Array<T, N> &self, const T &value)
 {
@@ -49,7 +49,7 @@ stack_array_push(Stack_Array<T, N> &self, const T &value)
 	self.data[self.count++] = value;
 }
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static T
 stack_array_pop(Stack_Array<T, N> &self)
 {
@@ -57,35 +57,35 @@ stack_array_pop(Stack_Array<T, N> &self)
 	return self.data[--self.count];
 }
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static void
 stack_array_clear(Stack_Array<T, N> &self)
 {
 	self.count = 0;
 }
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static const T *
 begin(const Stack_Array<T, N> &self)
 {
 	return self.data;
 }
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static T *
 begin(Stack_Array<T, N> &self)
 {
 	return self.data;
 }
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static const T *
 end(const Stack_Array<T, N> &self)
 {
 	return self.data + self.count;
 }
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static T *
 end(Stack_Array<T, N> &self)
 {
diff --git a/core/containers/string.h b/core/containers/string.h
index 36808d9e..42feca5e 100644
--- a/core/containers/string.h
+++ b/core/containers/string.h
@@ -19,7 +19,7 @@ string_init(memory::Allocator *allocator = memory::heap_allocator())
 
 // TODO: Unit test for null character if string is copied using for loop.
 inline static String
-string_with_capacity(u64 capacity, memory::Allocator *allocator = memory::heap_allocator())
+string_with_capacity(U64 capacity, memory::Allocator *allocator = memory::heap_allocator())
 {
 	String self = array_init_with_capacity<char>(capacity, allocator);
 	self.data[0] = '\0';
@@ -29,17 +29,17 @@ string_with_capacity(u64 capacity, memory::Allocator *allocator = memory::heap_a
 inline static String
 string_from(const char *c_string, memory::Allocator *allocator = memory::heap_allocator())
 {
-	auto length_of = [](const char *string) -> u64 {
-		u64 count = 0;
+	auto length_of = [](const char *string) -> U64 {
+		U64 count = 0;
 		const char *ptr = string;
 		while (*ptr++) ++count;
 		return count;
 	};
 
-	u64 length = c_string == nullptr ? 0 : length_of(c_string);
+	U64 length = c_string == nullptr ? 0 : length_of(c_string);
 	String self = array_init_with_capacity<char>(length + 1, allocator);
 	self.count = length;
-	for (u64 i = 0; i < length; ++i)
+	for (U64 i = 0; i < length; ++i)
 		self[i] = c_string[i];
 	self.data[self.count] = '\0';
 	return self;
@@ -69,8 +69,8 @@ string_copy(const String &self, memory::Allocator *allocator = memory::heap_allo
 inline static String
 string_literal(const char *c_string)
 {
-	auto length_of = [](const char *string) -> u64 {
-		u64 count = 0;
+	auto length_of = [](const char *string) -> U64 {
+		U64 count = 0;
 		const char *ptr = string;
 		while (*ptr++) ++count;
 		return count;
@@ -90,13 +90,13 @@ string_deinit(String &self)
 }
 
 inline static void
-string_reserve(String& self, u64 added_capacity)
+string_reserve(String& self, U64 added_capacity)
 {
 	array_reserve(self, added_capacity);
 }
 
 inline static void
-string_resize(String& self, u64 new_count)
+string_resize(String& self, U64 new_count)
 {
 	array_resize(self, new_count + 1);
 	--self.count;
@@ -119,7 +119,7 @@ string_append(String &self, char c)
 }
 
 inline static void
-string_append(String &self, char c, i32 count)
+string_append(String &self, char c, I32 count)
 {
 	if (count == 0)
 		return;
@@ -154,7 +154,7 @@ string_to_lowercase(char c)
 inline static String &
 string_to_lowercase(String &self)
 {
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 		self[i] = string_to_lowercase(self[i]);
 	return self;
 }
@@ -170,29 +170,29 @@ string_to_uppercase(char c)
 inline static String &
 string_to_uppercase(String &self)
 {
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 		self[i] = string_to_uppercase(self[i]);
 	return self;
 }
 
-inline static u64
-string_find_first_of(const String &self, const String &to_find, u64 start = 0)
+inline static U64
+string_find_first_of(const String &self, const String &to_find, U64 start = 0)
 {
 	if (self.count == 0 || to_find.count > self.count || to_find.count == 0 || start >= self.count)
-		return u64(-1);
+		return U64(-1);
 
-	u64 index = u64(-1);
-	for (u64 i = start; i < self.count; ++i)
+	U64 index = U64(-1);
+	for (U64 i = start; i < self.count; ++i)
 	{
 		if (self[i] != to_find[0])
 			continue;
 		else
 			index = i;
 
-		for (u64 c = 0; c < to_find.count; ++c)
+		for (U64 c = 0; c < to_find.count; ++c)
 		{
 			if (i + c >= self.count)
-				return u64(-1);
+				return U64(-1);
 
 			if (self[i + c] != to_find[c])
 				break;
@@ -202,61 +202,61 @@ string_find_first_of(const String &self, const String &to_find, u64 start = 0)
 		}
 	}
 
-	return u64(-1);
+	return U64(-1);
 }
 
-inline static u64
-string_find_first_of(const String &self, const char *to_find, u64 start = 0)
+inline static U64
+string_find_first_of(const String &self, const char *to_find, U64 start = 0)
 {
 	return string_find_first_of(self, string_literal(to_find), start);
 }
 
-inline static u64
-string_find_first_of(const char *c_string, const String &to_find, u64 start = 0)
+inline static U64
+string_find_first_of(const char *c_string, const String &to_find, U64 start = 0)
 {
 	return string_find_first_of(string_literal(c_string), to_find, start);
 }
 
-inline static u64
-string_find_first_of(const char *c_string, const char *to_find, u64 start = 0)
+inline static U64
+string_find_first_of(const char *c_string, const char *to_find, U64 start = 0)
 {
 	return string_find_first_of(string_literal(c_string), string_literal(to_find), start);
 }
 
-inline static u64
-string_find_first_of(const String &self, char c, u64 start = 0)
+inline static U64
+string_find_first_of(const String &self, char c, U64 start = 0)
 {
 	if (start >= self.count)
-		return u64(-1);
+		return U64(-1);
 
-	for (u64 i = start; i < self.count; ++i)
+	for (U64 i = start; i < self.count; ++i)
 		if (self[i] == c)
 			return i;
 
-	return u64(-1);
+	return U64(-1);
 }
 
-inline static u64
-string_find_first_of(const char *c_string, char c, u64 start = 0)
+inline static U64
+string_find_first_of(const char *c_string, char c, U64 start = 0)
 {
 	return string_find_first_of(string_literal(c_string), c, start);
 }
 
-inline static u64
+inline static U64
 string_find_last_of(const String &self, const String &to_find)
 {
 	if (self.count == 0 || to_find.count > self.count || to_find.count == 0)
-		return u64(-1);
+		return U64(-1);
 
-	u64 index = u64(-1);
-	for (u64 i = self.count - to_find.count; i != u64(-1); --i)
+	U64 index = U64(-1);
+	for (U64 i = self.count - to_find.count; i != U64(-1); --i)
 	{
 		if (self[i] != to_find[0])
 			continue;
 		else
 			index = i;
 
-		for (u64 c = 0; c < to_find.count; ++c)
+		for (U64 c = 0; c < to_find.count; ++c)
 		{
 			if (self[i + c] != to_find[c])
 				break;
@@ -266,36 +266,36 @@ string_find_last_of(const String &self, const String &to_find)
 		}
 	}
 
-	return u64(-1);
+	return U64(-1);
 }
 
-inline static u64
+inline static U64
 string_find_last_of(const String &self, const char *to_find)
 {
 	return string_find_last_of(self, string_literal(to_find));
 }
 
-inline static u64
+inline static U64
 string_find_last_of(const char *c_string, const String &to_find)
 {
 	return string_find_last_of(string_literal(c_string), to_find);
 }
 
-inline static u64
+inline static U64
 string_find_last_of(const char *c_string, const char *to_find)
 {
 	return string_find_last_of(string_literal(c_string), string_literal(to_find));
 }
 
-inline static u64
+inline static U64
 string_find_last_of(const String &self, char c)
 {
-	for (u64 i = self.count - 1; i != u64(-1); --i)
+	for (U64 i = self.count - 1; i != U64(-1); --i)
 	{
 		if (self[i] == c)
 			return i;
 	}
-	return u64(-1);
+	return U64(-1);
 }
 
 inline static bool
@@ -306,12 +306,12 @@ string_contains(const String &self, const String &other, bool case_insensitive =
 
 	if (case_insensitive)
 	{
-		for (u64 i = 0; i < self.count; ++i)
+		for (U64 i = 0; i < self.count; ++i)
 		{
 			if (string_to_lowercase(self[i]) != string_to_lowercase(other[0]))
 				continue;
 
-			for (u64 c = 0; c < other.count; ++c)
+			for (U64 c = 0; c < other.count; ++c)
 			{
 				if (string_to_lowercase(self[i + c]) != string_to_lowercase(other[c]))
 					break;
@@ -323,12 +323,12 @@ string_contains(const String &self, const String &other, bool case_insensitive =
 	}
 	else
 	{
-		for (u64 i = 0; i < self.count; ++i)
+		for (U64 i = 0; i < self.count; ++i)
 		{
 			if (self[i] != other[0])
 				continue;
 
-			for (u64 c = 0; c < other.count; ++c)
+			for (U64 c = 0; c < other.count; ++c)
 			{
 				if (self[i + c] != other[c])
 					break;
@@ -363,7 +363,7 @@ string_contains(const char *c_string, const char *other, bool case_insensitive =
 inline static bool
 string_contains(const String &self, char c, bool case_insensitive = false)
 {
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 		if ((case_insensitive ? string_to_lowercase(self[i]) : self[i]) == c)
 			return true;
 	return false;
@@ -384,7 +384,7 @@ string_starts_with(const String &self, const String &prefix)
 	if(self.count < prefix.count)
 		return false;
 
-	for (u64 i = 0; i < prefix.count; ++i)
+	for (U64 i = 0; i < prefix.count; ++i)
 		if (self[i] != prefix[i])
 			return false;
 
@@ -424,7 +424,7 @@ string_ends_with(const String &self, const String &suffix)
 	if(self.count < suffix.count)
 		return false;
 
-	for (u64 i = 0; i < suffix.count; ++i)
+	for (U64 i = 0; i < suffix.count; ++i)
 		if (self[self.count - suffix.count + i] != suffix[i])
 			return false;
 
@@ -460,9 +460,9 @@ string_remove_last(String &self)
 inline static void
 string_trim(String &self, const String &to_trim)
 {
-	u64 substring_start  = 0;
-	u64 substring_length = self.count;
-	for (u64 i = 0; i < self.count; ++i)
+	U64 substring_start  = 0;
+	U64 substring_length = self.count;
+	for (U64 i = 0; i < self.count; ++i)
 	{
 		if (substring_length == 0)
 			return;
@@ -474,7 +474,7 @@ string_trim(String &self, const String &to_trim)
 		--substring_length;
 	}
 
-	for (u64 i = self.count - 1; i > 0; --i)
+	for (U64 i = self.count - 1; i > 0; --i)
 	{
 		if (substring_length == 0)
 			return;
@@ -497,9 +497,9 @@ string_trim(String &self, const char *to_trim)
 inline static void
 string_trim_left(String &self, const String &to_trim)
 {
-	u64 substring_start  = 0;
-	u64 substring_length = self.count;
-	for (u64 i = 0; i < self.count; ++i)
+	U64 substring_start  = 0;
+	U64 substring_length = self.count;
+	for (U64 i = 0; i < self.count; ++i)
 	{
 		if (substring_length == 0)
 			return;
@@ -525,9 +525,9 @@ string_trim_left(String &self, const char *to_trim)
 inline static void
 string_trim_right(String &self, const String &to_trim)
 {
-	u64 substring_start  = 0;
-	u64 substring_length = self.count;
-	for (u64 i = self.count - 1; i > 0; --i)
+	U64 substring_start  = 0;
+	U64 substring_length = self.count;
+	for (U64 i = self.count - 1; i > 0; --i)
 	{
 		if (substring_length == 0)
 			return;
@@ -558,9 +558,9 @@ string_split(const String &self, char delimeter, bool skip_empty = true, memory:
 {
 	Array<String> splits = array_init<String>(allocator);
 
-	u64 current = 0;
-	u64 index   = 0;
-	while((index = string_find_first_of(self, delimeter, current)) != u64(-1))
+	U64 current = 0;
+	U64 index   = 0;
+	while((index = string_find_first_of(self, delimeter, current)) != U64(-1))
 	{
 		if ((index - current) != 0 || skip_empty == false)
 			array_push(splits, string_from(self.data + current, self.data + index, allocator));
@@ -582,9 +582,9 @@ string_split(const String &self, const String &delimeter, bool skip_empty = true
 {
 	Array<String> splits = array_init<String>(allocator);
 
-	u64 current = 0;
-	u64 index   = 0;
-	while((index = string_find_first_of(self, delimeter, current)) != u64(-1))
+	U64 current = 0;
+	U64 index   = 0;
+	while((index = string_find_first_of(self, delimeter, current)) != U64(-1))
 	{
 		if ((index - current) != 0 || skip_empty == false)
 			array_push(splits, string_from(self.data + current, self.data + index, allocator));
@@ -616,7 +616,7 @@ string_split(const char *c_string, const char *delimeter, bool skip_empty = true
 inline static void
 string_replace(String &self, char to_replace, char replacement)
 {
-	for(u64 i = 0; i < self.count; ++i)
+	for(U64 i = 0; i < self.count; ++i)
 		if(self[i] == to_replace)
 			self[i] = replacement;
 }
@@ -631,7 +631,7 @@ string_replace(String &self, const String &to_replace, const String &replacement
 	if (string_starts_with(self, to_replace))
 		string_append(copy, replacement);
 
-	for (u64 i = 0; i < splits.count; ++i)
+	for (U64 i = 0; i < splits.count; ++i)
 	{
 		string_append(copy, splits[i]);
 		if (i != splits.count - 1)
@@ -667,10 +667,10 @@ string_replace(String &self, const char *to_replace, const char *replacement)
 //       Add string_replace(String &, const char *, char);
 //       Add start index to replace and other suited functions.
 inline static void
-string_replace_first_occurance(String &self, const String &to_replace, const String &replacement, u64 start = 0)
+string_replace_first_occurance(String &self, const String &to_replace, const String &replacement, U64 start = 0)
 {
-	u64 index = string_find_first_of(self, to_replace, start);
-	if (index != u64(-1))
+	U64 index = string_find_first_of(self, to_replace, start);
+	if (index != U64(-1))
 	{
 		String temp = string_from(begin(self) + index + to_replace.count, end(self), memory::temp_allocator());
 		string_resize(self, index);
@@ -680,19 +680,19 @@ string_replace_first_occurance(String &self, const String &to_replace, const Str
 }
 
 inline static void
-string_replace_first_occurance(String &self, const char *to_replace, const String &replacement, u64 start = 0)
+string_replace_first_occurance(String &self, const char *to_replace, const String &replacement, U64 start = 0)
 {
 	string_replace_first_occurance(self, string_literal(to_replace), replacement, start);
 }
 
 inline static void
-string_replace_first_occurance(String &self, const String &to_replace, const char *replacement, u64 start = 0)
+string_replace_first_occurance(String &self, const String &to_replace, const char *replacement, U64 start = 0)
 {
 	string_replace_first_occurance(self, to_replace, string_literal(replacement), start);
 }
 
 inline static void
-string_replace_first_occurance(String &self, const char *to_replace, const char *replacement, u64 start = 0)
+string_replace_first_occurance(String &self, const char *to_replace, const char *replacement, U64 start = 0)
 {
 	string_replace_first_occurance(self, string_literal(to_replace), string_literal(replacement), start);
 }
@@ -709,7 +709,7 @@ operator==(const String &self, const String &other)
 	if (self.count != other.count)
 		return false;
 
-	for (u64 i = 0; i < self.count; ++i)
+	for (U64 i = 0; i < self.count; ++i)
 		if (self[i] != other[i])
 			return false;
 
@@ -758,7 +758,7 @@ destroy(String &self)
 	string_deinit(self);
 }
 
-inline static u64
+inline static U64
 hash(const String &self)
 {
 	return hash_fnv_x32(self.data, self.count);
diff --git a/core/containers/string_interner.cpp b/core/containers/string_interner.cpp
index 8f074e50..f0ece81e 100644
--- a/core/containers/string_interner.cpp
+++ b/core/containers/string_interner.cpp
@@ -36,7 +36,7 @@ string_interner_intern(String_Interner &self, const char *begin, const char *end
 }
 
 const char *
-string_interner_intern(String_Interner &self, const char *begin, u64 count)
+string_interner_intern(String_Interner &self, const char *begin, U64 count)
 {
 	return string_interner_intern(self, begin, begin + count);
 }
\ No newline at end of file
diff --git a/core/containers/string_interner.h b/core/containers/string_interner.h
index 9e3e538c..d5fe00b9 100644
--- a/core/containers/string_interner.h
+++ b/core/containers/string_interner.h
@@ -26,4 +26,4 @@ CORE_API const char *
 string_interner_intern(String_Interner &self, const char *begin, const char *end);
 
 CORE_API const char *
-string_interner_intern(String_Interner &self, const char *begin, u64 count);
\ No newline at end of file
+string_interner_intern(String_Interner &self, const char *begin, U64 count);
\ No newline at end of file
diff --git a/core/defines.h b/core/defines.h
index a6a4fc4b..9d7bc4cc 100644
--- a/core/defines.h
+++ b/core/defines.h
@@ -78,21 +78,18 @@
 #define F64_MIN DBL_MIN
 #define F64_MAX DBL_MAX
 
-typedef int8_t    i8;
-typedef int16_t   i16;
-typedef int32_t   i32;
-typedef int64_t   i64;
+typedef int8_t    I8;
+typedef int16_t   I16;
+typedef int32_t   I32;
+typedef int64_t   I64;
 
-typedef uint8_t   u8;
-typedef uint16_t  u16;
-typedef uint32_t  u32;
-typedef uint64_t  u64;
+typedef uint8_t   U8;
+typedef uint16_t  U16;
+typedef uint32_t  U32;
+typedef uint64_t  U64;
 
-typedef float     f32;
-typedef double    f64;
-
-typedef intptr_t  iptr;
-typedef uintptr_t uptr;
+typedef float     F32;
+typedef double    F64;
 
 template <class T, template <class...> class Template>
 struct is_specialization : std::false_type {};
@@ -106,7 +103,7 @@ concept is_specialization_v = is_specialization<T, Template>::value;
 template <typename>
 struct is_bounded_char_array : std::false_type {};
 
-template <u64 N>
+template <U64 N>
 struct is_bounded_char_array<char[N]> : std::true_type {};
 
 template <typename>
@@ -128,7 +125,7 @@ namespace memory { struct Allocator; }
 struct Block
 {
 	void *data;
-	u64 size;
+	U64 size;
 };
 
 template <typename T>
@@ -146,8 +143,8 @@ destroy(T &)
 	static_assert(sizeof(T) == 0, "There is no `void destroy(T &)` function overload defined for this type.");
 }
 
-template <typename T, u64 N>
-inline static u64
+template <typename T, U64 N>
+inline static U64
 count_of(const T (&)[N])
 {
 	return N;
diff --git a/core/ecs.cpp b/core/ecs.cpp
index 8009a9a3..025788f7 100644
--- a/core/ecs.cpp
+++ b/core/ecs.cpp
@@ -7,7 +7,7 @@ namespace ecs
 	Entity
 	entity_new()
 	{
-		static std::atomic<u64> id = 0;
+		static std::atomic<U64> id = 0;
 		return Entity{id.fetch_add(1)};
 	}
 }
\ No newline at end of file
diff --git a/core/ecs.h b/core/ecs.h
index 6aa0c5dc..3535e175 100644
--- a/core/ecs.h
+++ b/core/ecs.h
@@ -13,7 +13,7 @@ namespace ecs
 {
 	struct Entity
 	{
-		u64 id = U64_MAX;
+		U64 id = U64_MAX;
 
 		bool
 		operator==(Entity other) const
@@ -31,7 +31,7 @@ namespace ecs
 	CORE_API Entity
 	entity_new();
 
-	typedef u64 Component_Hash;
+	typedef U64 Component_Hash;
 
 	struct IComponent_Table
 	{
@@ -51,13 +51,13 @@ namespace ecs
 	struct Component_Table : IComponent_Table
 	{
 		memory::Pool_Allocator *pool;
-		Hash_Table<u64, T *> components;
+		Hash_Table<U64, T *> components;
 		inline static const char *table_name = typeid(T).name();
 
 		Component_Table()
 		{
 			pool       = memory::pool_allocator_init(sizeof(T), 64);
-			components = hash_table_init<u64, T *>();
+			components = hash_table_init<U64, T *>();
 		}
 
 		~Component_Table() override
@@ -129,7 +129,7 @@ namespace ecs
 		const T *
 		read(Entity e)
 		{
-			const auto [_, v] = *hash_table_find(component_tables, (u64)typeid(T).hash_code());
+			const auto [_, v] = *hash_table_find(component_tables, (U64)typeid(T).hash_code());
 			return (const T *)v->read(e);
 		}
 
@@ -137,7 +137,7 @@ namespace ecs
 		T *
 		write(Entity e)
 		{
-			auto [_, v] = *hash_table_find(component_tables, (u64)typeid(T).hash_code());
+			auto [_, v] = *hash_table_find(component_tables, (U64)typeid(T).hash_code());
 			return (T *)v->write(e);
 		}
 
@@ -145,7 +145,7 @@ namespace ecs
 		void
 		remove(Entity e)
 		{
-			auto [_, v] = *hash_table_find(component_tables, (u64)typeid(T).hash_code());
+			auto [_, v] = *hash_table_find(component_tables, (U64)typeid(T).hash_code());
 			v->remove(e);
 		}
 
@@ -153,13 +153,13 @@ namespace ecs
 		Array<Entity>
 		list()
 		{
-			Array<Entity> entities[sizeof...(TArgs)] = { hash_table_find(component_tables, (u64)typeid(TArgs).hash_code())->value->entities()... };
-			u64 min_idx = u64(-1);
-			for (u64 i = 0; i < sizeof...(TArgs); ++i)
+			Array<Entity> entities[sizeof...(TArgs)] = { hash_table_find(component_tables, (U64)typeid(TArgs).hash_code())->value->entities()... };
+			U64 min_idx = U64(-1);
+			for (U64 i = 0; i < sizeof...(TArgs); ++i)
 				if (entities[i].count < min_idx)
 					min_idx = i;
 
-			IComponent_Table *tables[sizeof...(TArgs)] = { hash_table_find(component_tables, (u64)typeid(TArgs).hash_code())->value... };
+			IComponent_Table *tables[sizeof...(TArgs)] = { hash_table_find(component_tables, (U64)typeid(TArgs).hash_code())->value... };
 			Array<Entity> res = array_copy(entities[min_idx], memory::temp_allocator());
 			for (auto table : tables)
 				array_remove_if(res, [table](Entity e) { return table->read(e) == nullptr; });
@@ -205,7 +205,7 @@ namespace ecs
 	inline static void
 	ecs_add_table(ECS &self)
 	{
-		hash_table_insert(self.component_tables, (u64)typeid(T).hash_code(), (IComponent_Table *)memory::allocate_and_call_constructor<Component_Table<T>>());
+		hash_table_insert(self.component_tables, (U64)typeid(T).hash_code(), (IComponent_Table *)memory::allocate_and_call_constructor<Component_Table<T>>());
 	}
 
 	template <Component_Type T>
diff --git a/core/formatter.h b/core/formatter.h
index 65ad1fcd..b90dab47 100644
--- a/core/formatter.h
+++ b/core/formatter.h
@@ -5,6 +5,26 @@
 #include "core/containers/string.h"
 #include "core/containers/hash_table.h"
 
+#include "core/math/f32x2.h"
+#include "core/math/f32x3.h"
+#include "core/math/f32x4.h"
+#include "core/math/f32x2x2.h"
+#include "core/math/f32x3x3.h"
+#include "core/math/f32x4x4.h"
+#include "core/math/f64x2.h"
+#include "core/math/f64x3.h"
+#include "core/math/f64x4.h"
+#include "core/math/f64x2x2.h"
+#include "core/math/f64x3x3.h"
+#include "core/math/f64x4x4.h"
+#include "core/math/i32x2.h"
+#include "core/math/i32x3.h"
+#include "core/math/i32x4.h"
+#include "core/math/u32x2.h"
+#include "core/math/u32x3.h"
+#include "core/math/u32x4.h"
+#include "core/math/quaternion.h"
+
 #include <stdlib.h>
 
 /*
@@ -13,6 +33,14 @@
 	- [ ] Compile time check string format.
 	- [ ] Use formatting in validate messages.
 	- [ ] Cleanup.
+	- [ ] Refactor math-type formatters out of formatter.h. They're bundled here
+	      for expedience while the math library lands, but ideally formatter.h
+	      shouldn't depend on every math header (adds compile time for every
+	      log-using TU). Options to explore: (a) opt-in core/math/format.h
+	      companion header included by callers who want math logging,
+	      (b) a trait-based customization point so any type with a
+	      to_string()-like member can format itself, or (c) forward-only
+	      declarations with definitions in the math TUs.
 */
 
 enum Format_Specifier
@@ -44,8 +72,8 @@ struct Format_Options
 {
 	Format_Specifier specifier = FORMAT_SPECIFIER_NONE;
 	Format_Alignment alignment = FORMAT_ALIGNMENT_NONE;
-	u32 width = 0;
-	u32 precision = 6;
+	U32 width = 0;
+	U32 precision = 6;
 	bool zero_pad = false;
 	bool remove_trailing_zeros = true;
 };
@@ -85,7 +113,7 @@ format_apply_width_alignment(Formatter &self, const String &content, const Forma
 		return;
 	}
 
-	u64 padding = options.width - content.count;
+	U64 padding = options.width - content.count;
 
 	// Special handling for zero-padding with negative numbers or prefixes
 	if (options.zero_pad && (options.alignment == FORMAT_ALIGNMENT_NONE || options.alignment == FORMAT_ALIGNMENT_RIGHT))
@@ -110,19 +138,19 @@ format_apply_width_alignment(Formatter &self, const String &content, const Forma
 				string_append(self.buffer, content[1]);
 				string_append(self.buffer, content[2]);
 				// Output padding
-				for (u64 i = 0; i < padding; ++i)
+				for (U64 i = 0; i < padding; ++i)
 					string_append(self.buffer, '0');
 				// Output rest of content
-				for (u64 i = 3; i < content.count; ++i)
+				for (U64 i = 3; i < content.count; ++i)
 					string_append(self.buffer, content[i]);
 			}
 			else
 			{
 				// Output padding
-				for (u64 i = 0; i < padding; ++i)
+				for (U64 i = 0; i < padding; ++i)
 					string_append(self.buffer, '0');
 				// Output rest of content (skip the sign we already added)
-				for (u64 i = 1; i < content.count; ++i)
+				for (U64 i = 1; i < content.count; ++i)
 					string_append(self.buffer, content[i]);
 			}
 		}
@@ -132,16 +160,16 @@ format_apply_width_alignment(Formatter &self, const String &content, const Forma
 			string_append(self.buffer, content[0]);
 			string_append(self.buffer, content[1]);
 			// Output padding
-			for (u64 i = 0; i < padding; ++i)
+			for (U64 i = 0; i < padding; ++i)
 				string_append(self.buffer, '0');
 			// Output rest of content
-			for (u64 i = 2; i < content.count; ++i)
+			for (U64 i = 2; i < content.count; ++i)
 				string_append(self.buffer, content[i]);
 		}
 		else
 		{
 			// No sign or prefix, just pad normally
-			for (u64 i = 0; i < padding; ++i)
+			for (U64 i = 0; i < padding; ++i)
 				string_append(self.buffer, '0');
 			string_append(self.buffer, content);
 		}
@@ -155,24 +183,24 @@ format_apply_width_alignment(Formatter &self, const String &content, const Forma
 	{
 		// Left align: content then padding
 		string_append(self.buffer, content);
-		for (u64 i = 0; i < padding; ++i)
+		for (U64 i = 0; i < padding; ++i)
 			string_append(self.buffer, pad_char);
 	}
 	else if (options.alignment == FORMAT_ALIGNMENT_CENTER)
 	{
 		// Center align: padding/2, content, padding/2
-		u64 left_pad = padding / 2;
-		u64 right_pad = padding - left_pad;
-		for (u64 i = 0; i < left_pad; ++i)
+		U64 left_pad = padding / 2;
+		U64 right_pad = padding - left_pad;
+		for (U64 i = 0; i < left_pad; ++i)
 			string_append(self.buffer, pad_char);
 		string_append(self.buffer, content);
-		for (u64 i = 0; i < right_pad; ++i)
+		for (U64 i = 0; i < right_pad; ++i)
 			string_append(self.buffer, pad_char);
 	}
 	else // FORMAT_ALIGNMENT_RIGHT or NONE (default right for numbers)
 	{
 		// Right align: padding then content
-		for (u64 i = 0; i < padding; ++i)
+		for (U64 i = 0; i < padding; ++i)
 			string_append(self.buffer, pad_char);
 		string_append(self.buffer, content);
 	}
@@ -181,7 +209,7 @@ format_apply_width_alignment(Formatter &self, const String &content, const Forma
 template <typename T>
 requires (std::is_integral_v<T> && !std::is_floating_point_v<T>)
 inline static String
-format(Formatter &self, T data, u8 base = 10, bool uppercase = false)
+format(Formatter &self, T data, U8 base = 10, bool uppercase = false)
 {
 	const char *digits = uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
 
@@ -194,10 +222,10 @@ format(Formatter &self, T data, u8 base = 10, bool uppercase = false)
 	}
 
 	char temp[64] = {};
-	u64 count = 0;
+	U64 count = 0;
 	do
 	{
-		temp[count++] = digits[(uptr)(data % base)];
+		temp[count++] = digits[(U64)(data % base)];
 		data = (T)(data / base);
 	} while (data != 0);
 
@@ -213,7 +241,7 @@ format(Formatter &self, T data, u8 base = 10, bool uppercase = false)
 		// Pad to at least 2 digits for hex
 		if (count < 2)
 		{
-			for (u64 i = count; i < 2; ++i)
+			for (U64 i = count; i < 2; ++i)
 				string_append(self.buffer, '0');
 		}
 	}
@@ -229,7 +257,7 @@ format(Formatter &self, T data, u8 base = 10, bool uppercase = false)
 	}
 	// for base == 10 there's no prefix; sign already emitted
 
-	for (i64 i = count - 1; i >= 0; --i)
+	for (I64 i = count - 1; i >= 0; --i)
 		string_append(self.buffer, temp[i]);
 
 	return self.buffer;
@@ -244,7 +272,7 @@ format(Formatter &self, T data, const Format_Options &options)
 	Formatter temp = formatter_init(self.buffer.allocator);
 	DEFER(formatter_deinit(temp));
 
-	u8 base = 10;
+	U8 base = 10;
 	bool uppercase = false;
 
 	switch (options.specifier)
@@ -302,7 +330,7 @@ format(Formatter &self, T data, const Format_Options &options)
 template <typename T>
 requires (std::is_floating_point_v<T>)
 inline static String
-format(Formatter &self, T data, u32 precision = 6, bool remove_trailing_zeros = true)
+format(Formatter &self, T data, U32 precision = 6, bool remove_trailing_zeros = true)
 {
 	if (data < 0)
 	{
@@ -310,15 +338,15 @@ format(Formatter &self, T data, u32 precision = 6, bool remove_trailing_zeros =
 		data = -data;
 	}
 
-	u64 integer = (u64)data;
-	f64 fraction = data - integer;
-	format(self, (u64)integer);
+	U64 integer = (U64)data;
+	F64 fraction = data - integer;
+	format(self, (U64)integer);
 	string_append(self.buffer, '.');
 
-	for (u64 i = 0; i < precision; ++i)
+	for (U64 i = 0; i < precision; ++i)
 	{
 		fraction *= 10;
-		integer = (u64)fraction;
+		integer = (U64)fraction;
 		format(self, integer);
 		fraction = fraction - integer;
 	}
@@ -395,7 +423,7 @@ format(Formatter &self, char data, const Format_Options &options)
 	else
 	{
 		// Format as integer
-		format(self, (u8)data, options);
+		format(self, (U8)data, options);
 	}
 	return self.buffer;
 }
@@ -405,7 +433,7 @@ requires (std::is_pointer_v<T> && !is_c_string_v<T>)
 inline static String
 format(Formatter &self, const T &data)
 {
-	return format(self, (uptr)data, 16, false);
+	return format(self, (U64)data, 16, false);
 }
 
 template <typename T>
@@ -417,7 +445,7 @@ format(Formatter &self, const T &data, const Format_Options &options)
 	DEFER(formatter_deinit(temp));
 
 	bool uppercase = (options.specifier == FORMAT_SPECIFIER_POINTER_UPPER);
-	format(temp, (uptr)data, 16, uppercase);
+	format(temp, (U64)data, 16, uppercase);
 	format_apply_width_alignment(self, temp.buffer, options);
 	return self.buffer;
 }
@@ -465,7 +493,7 @@ inline static String
 format(Formatter &self, const T &data)
 {
 	format(self, "[{}] {{ ", count_of(data));
-	for (u64 i = 0; i < count_of(data); ++i)
+	for (U64 i = 0; i < count_of(data); ++i)
 	{
 		if (i != 0)
 			string_append(self.buffer, ", ");
@@ -481,7 +509,7 @@ inline static String
 format(Formatter &self, const Array<T> &data)
 {
 	format(self, "[{}] {{ ", data.count);
-	for (u64 i = 0; i < data.count; ++i)
+	for (U64 i = 0; i < data.count; ++i)
 	{
 		if (i != 0)
 			string_append(self.buffer, ", ");
@@ -496,7 +524,7 @@ inline static String
 format(Formatter &self, const Hash_Table<K, V> &data)
 {
 	format(self, "[{}] {{ ", data.count);
-	u64 i = 0;
+	U64 i = 0;
 	for (const auto &[key, value] : data)
 	{
 		if (i != 0)
@@ -512,13 +540,13 @@ format(Formatter &self, const Hash_Table<K, V> &data)
 
 struct Format_Field
 {
-	u32 index;
+	U32 index;
 	Format_Options options;
 	bool has_index;
 };
 
 inline static Format_Field
-parse_format_field(const String &fmt, u32 &i)
+parse_format_field(const String &fmt, U32 &i)
 {
 	Format_Field field = {};
 	field.options.specifier = FORMAT_SPECIFIER_NONE;
@@ -535,8 +563,8 @@ parse_format_field(const String &fmt, u32 &i)
 		char *end = nullptr;
 		field.index = ::strtoul(&fmt[i + 1], &end, 10);
 		field.has_index = true;
-		i64 length = end - &fmt[i + 1];
-		i += (u32)length + 1;
+		I64 length = end - &fmt[i + 1];
+		i += (U32)length + 1;
 	}
 	else
 	{
@@ -580,8 +608,8 @@ parse_format_field(const String &fmt, u32 &i)
 		{
 			char *end = nullptr;
 			field.options.width = ::strtoul(&fmt[i], &end, 10);
-			i64 length = end - &fmt[i];
-			i += (u32)length;
+			I64 length = end - &fmt[i];
+			i += (U32)length;
 		}
 
 		// Parse type specifier (optional)
@@ -627,7 +655,7 @@ template <typename ...TArgs>
 inline static String
 format(Formatter &self, const String &fmt, TArgs &&...args)
 {
-	constexpr auto append_field_data = []<typename T>(Formatter &self, const T &data, u32 &argument_index, u32 target_index, const Format_Options &options) {
+	constexpr auto append_field_data = []<typename T>(Formatter &self, const T &data, U32 &argument_index, U32 target_index, const Format_Options &options) {
 		if (argument_index == target_index)
 		{
 			if constexpr (std::is_same_v<T, char>)
@@ -638,10 +666,10 @@ format(Formatter &self, const String &fmt, TArgs &&...args)
 			{
 				Formatter temp = formatter_init(self.buffer.allocator);
 				DEFER(formatter_deinit(temp));
-				u64 count = count_of(data);
+				U64 count = count_of(data);
 				if (count > 0 && data[count - 1] == '\0')
 					--count;
-				for (u64 i = 0; i < count; ++i)
+				for (U64 i = 0; i < count; ++i)
 					string_append(temp.buffer, data[i]);
 				format_apply_width_alignment(self, temp.buffer, options);
 			}
@@ -688,10 +716,10 @@ format(Formatter &self, const String &fmt, TArgs &&...args)
 		return string_literal("");
 
 	// Count arguments (excluding trailing allocator)
-	u32 argument_count = sizeof...(args);
+	U32 argument_count = sizeof...(args);
 	if constexpr (sizeof...(args) > 0)
 	{
-		[[maybe_unused]] u32 argument_index = 0;
+		[[maybe_unused]] U32 argument_index = 0;
 		([&]() {
 			if (argument_index == argument_count - 1 && is_allocator(args))
 				--argument_count;
@@ -699,11 +727,11 @@ format(Formatter &self, const String &fmt, TArgs &&...args)
 		}(), ...);
 	}
 
-	u32 auto_index = 0;
+	U32 auto_index = 0;
 	bool uses_manual_indexing = false;
 	bool uses_auto_indexing = false;
 
-	for (u32 i = 0; i < fmt.count; ++i)
+	for (U32 i = 0; i < fmt.count; ++i)
 	{
 		if (fmt[i] == '{')
 		{
@@ -721,7 +749,7 @@ format(Formatter &self, const String &fmt, TArgs &&...args)
 			Format_Field field = parse_format_field(fmt, i);
 
 			// Determine which index to use
-			u32 target_index;
+			U32 target_index;
 			if (field.has_index)
 			{
 				uses_manual_indexing = true;
@@ -737,7 +765,7 @@ format(Formatter &self, const String &fmt, TArgs &&...args)
 			// Append the argument
 			if constexpr (sizeof...(args) > 0)
 			{
-				u32 index = 0;
+				U32 index = 0;
 				(append_field_data(self, args, index, target_index, field.options), ...);
 			}
 		}
@@ -770,7 +798,7 @@ template <typename ...TArgs>
 inline static String
 format(const String &fmt, TArgs &&...args)
 {
-	[[maybe_unused]] constexpr auto set_allocator = []<typename T>(memory::Allocator *&allocator, const T &data, u32 &argument_index, u32 argument_count) {
+	[[maybe_unused]] constexpr auto set_allocator = []<typename T>(memory::Allocator *&allocator, const T &data, U32 &argument_index, U32 argument_count) {
 		if constexpr (std::is_base_of_v<memory::Allocator, T> || std::is_same_v<T, memory::Allocator *>)
 			if (argument_index == argument_count - 1)
 				allocator = data;
@@ -778,7 +806,7 @@ format(const String &fmt, TArgs &&...args)
 	};
 
 	memory::Allocator *allocator = memory::heap_allocator();
-	[[maybe_unused]] u32 argument_index = 0;
+	[[maybe_unused]] U32 argument_index = 0;
 	(set_allocator(allocator, args, argument_index, sizeof...(args)), ...);
 
 	Formatter self = formatter_init(allocator);
@@ -801,4 +829,86 @@ to_string(const T &data, memory::Allocator *allocator = memory::heap_allocator()
 	Formatter self = formatter_init(allocator);
 	DEFER(self = Formatter{});
 	return format(self, "{}", data);
+}
+
+// ============================================================================
+// Math type formatters. Placed after the variadic format() dispatchers so their
+// call sites can resolve via ordinary lookup. See TODO at top — these ideally
+// shouldn't live here long-term.
+// ============================================================================
+
+inline static String format(Formatter &self, const F32x2 &v) { return format(self, "{{{}, {}}}",         v.x, v.y); }
+inline static String format(Formatter &self, const F32x3 &v) { return format(self, "{{{}, {}, {}}}",    v.x, v.y, v.z); }
+inline static String format(Formatter &self, const F32x4 &v) { return format(self, "{{{}, {}, {}, {}}}", v.x, v.y, v.z, v.w); }
+
+inline static String format(Formatter &self, const F64x2 &v) { return format(self, "{{{}, {}}}",         v.x, v.y); }
+inline static String format(Formatter &self, const F64x3 &v) { return format(self, "{{{}, {}, {}}}",    v.x, v.y, v.z); }
+inline static String format(Formatter &self, const F64x4 &v) { return format(self, "{{{}, {}, {}, {}}}", v.x, v.y, v.z, v.w); }
+
+inline static String format(Formatter &self, const I32x2 &v) { return format(self, "{{{}, {}}}",         v.x, v.y); }
+inline static String format(Formatter &self, const I32x3 &v) { return format(self, "{{{}, {}, {}}}",    v.x, v.y, v.z); }
+inline static String format(Formatter &self, const I32x4 &v) { return format(self, "{{{}, {}, {}, {}}}", v.x, v.y, v.z, v.w); }
+
+inline static String format(Formatter &self, const U32x2 &v) { return format(self, "{{{}, {}}}",         v.x, v.y); }
+inline static String format(Formatter &self, const U32x3 &v) { return format(self, "{{{}, {}, {}}}",    v.x, v.y, v.z); }
+inline static String format(Formatter &self, const U32x4 &v) { return format(self, "{{{}, {}, {}, {}}}", v.x, v.y, v.z, v.w); }
+
+inline static String
+format(Formatter &self, const F32x2x2 &M)
+{
+	return format(self, "[[{}, {}], [{}, {}]]",
+		M.m00, M.m01,
+		M.m10, M.m11);
+}
+
+inline static String
+format(Formatter &self, const F32x3x3 &M)
+{
+	return format(self, "[[{}, {}, {}], [{}, {}, {}], [{}, {}, {}]]",
+		M.m00, M.m01, M.m02,
+		M.m10, M.m11, M.m12,
+		M.m20, M.m21, M.m22);
+}
+
+inline static String
+format(Formatter &self, const F32x4x4 &M)
+{
+	return format(self, "[[{}, {}, {}, {}], [{}, {}, {}, {}], [{}, {}, {}, {}], [{}, {}, {}, {}]]",
+		M.m00, M.m01, M.m02, M.m03,
+		M.m10, M.m11, M.m12, M.m13,
+		M.m20, M.m21, M.m22, M.m23,
+		M.m30, M.m31, M.m32, M.m33);
+}
+
+inline static String
+format(Formatter &self, const F64x2x2 &M)
+{
+	return format(self, "[[{}, {}], [{}, {}]]",
+		M.m00, M.m01,
+		M.m10, M.m11);
+}
+
+inline static String
+format(Formatter &self, const F64x3x3 &M)
+{
+	return format(self, "[[{}, {}, {}], [{}, {}, {}], [{}, {}, {}]]",
+		M.m00, M.m01, M.m02,
+		M.m10, M.m11, M.m12,
+		M.m20, M.m21, M.m22);
+}
+
+inline static String
+format(Formatter &self, const F64x4x4 &M)
+{
+	return format(self, "[[{}, {}, {}, {}], [{}, {}, {}, {}], [{}, {}, {}, {}], [{}, {}, {}, {}]]",
+		M.m00, M.m01, M.m02, M.m03,
+		M.m10, M.m11, M.m12, M.m13,
+		M.m20, M.m21, M.m22, M.m23,
+		M.m30, M.m31, M.m32, M.m33);
+}
+
+inline static String
+format(Formatter &self, const Quaternion &q)
+{
+	return format(self, "{{w={}, x={}, y={}, z={}}}", q.w, q.x, q.y, q.z);
 }
\ No newline at end of file
diff --git a/core/hash.h b/core/hash.h
index 2c19a67e..370c647f 100644
--- a/core/hash.h
+++ b/core/hash.h
@@ -3,14 +3,14 @@
 #include "core/defines.h"
 
 // 32 bit Fowler-Noll-Vo hash.
-inline static u64
-hash_fnv_x32(const void *key, u64 key_length)
+inline static U64
+hash_fnv_x32(const void *key, U64 key_length)
 {
-	const u32 p = 16777619U;
-	u32 hash    = 2166136261U;
+	const U32 p = 16777619U;
+	U32 hash    = 2166136261U;
 
-	const u8 *data = (const u8 *)key;
-	for (u64 i = 0; i < key_length; ++i)
+	const U8 *data = (const U8 *)key;
+	for (U64 i = 0; i < key_length; ++i)
 		hash = (hash ^ data[i]) * p;
 
 	hash += hash << 13;
@@ -22,7 +22,7 @@ hash_fnv_x32(const void *key, u64 key_length)
 }
 
 template <typename T>
-inline static u64
+inline static U64
 hash(const T &)
 {
 	static_assert(sizeof(T) == 0, "There is no 'u64 hash(const T &)' function overload defined for this type.");
@@ -30,87 +30,87 @@ hash(const T &)
 }
 
 template <typename T>
-inline static u64
+inline static U64
 hash(T *key)
 {
-	return u64(key);
+	return U64(key);
 };
 
 template <typename T>
-inline static u64
+inline static U64
 hash(const T *key)
 {
-	return u64(key);
+	return U64(key);
 };
 
-inline static u64
+inline static U64
 hash(bool key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
+inline static U64
 hash(char key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(i8 key)
+inline static U64
+hash(I8 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(i16 key)
+inline static U64
+hash(I16 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(i32 key)
+inline static U64
+hash(I32 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(i64 key)
+inline static U64
+hash(I64 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(u8 key)
+inline static U64
+hash(U8 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(u16 key)
+inline static U64
+hash(U16 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(u32 key)
+inline static U64
+hash(U32 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(u64 key)
+inline static U64
+hash(U64 key)
 {
-	return u64(key);
+	return U64(key);
 }
 
-inline static u64
-hash(f32 key)
+inline static U64
+hash(F32 key)
 {
-	return hash_fnv_x32(&key, sizeof(f32));
+	return hash_fnv_x32(&key, sizeof(F32));
 }
 
-inline static u64
-hash(f64 key)
+inline static U64
+hash(F64 key)
 {
-	return hash_fnv_x32(&key, sizeof(f64));
+	return hash_fnv_x32(&key, sizeof(F64));
 }
\ No newline at end of file
diff --git a/core/json.cpp b/core/json.cpp
index 0b689836..67f9a65c 100644
--- a/core/json.cpp
+++ b/core/json.cpp
@@ -15,8 +15,8 @@ struct JSON_Parser
 {
 	memory::Allocator *allocator;
 	const char *iterator;
-	u32 line_number;
-	u32 column_number;
+	U32 line_number;
+	U32 column_number;
 	Error error;
 };
 
@@ -187,7 +187,7 @@ _json_parser_parse_number(JSON_Parser &self)
 	}
 
 	char *end  = nullptr;
-	f64 number = ::strtod(self.iterator, &end);
+	F64 number = ::strtod(self.iterator, &end);
 	if (errno == ERANGE)
 	{
 		self.error = Error{
@@ -198,7 +198,7 @@ _json_parser_parse_number(JSON_Parser &self)
 		};
 		return JSON_Value{};
 	}
-	self.column_number += u32(end - self.iterator);
+	self.column_number += U32(end - self.iterator);
 	self.iterator = end;
 
 	JSON_Value value = {};
@@ -363,13 +363,13 @@ _json_parser_parse_value(JSON_Parser &self)
 }
 
 inline static void
-_json_value_object_to_string(const JSON_Value &self, String &json_string, i32 indent_level);
+_json_value_object_to_string(const JSON_Value &self, String &json_string, I32 indent_level);
 
 inline static void
-_json_value_array_to_string(const JSON_Value &self, String &json_string, i32 indent_level = 0)
+_json_value_array_to_string(const JSON_Value &self, String &json_string, I32 indent_level = 0)
 {
 	string_append(json_string, "[\n");
-	for (u64 i = 0; i < self.as_array.count; ++i)
+	for (U64 i = 0; i < self.as_array.count; ++i)
 	{
 		if (i > 0)
 			string_append(json_string, ",\n");
@@ -407,10 +407,10 @@ _json_value_array_to_string(const JSON_Value &self, String &json_string, i32 ind
 }
 
 inline static void
-_json_value_object_to_string(const JSON_Value &self, String &json_string, i32 indent_level = 0)
+_json_value_object_to_string(const JSON_Value &self, String &json_string, I32 indent_level = 0)
 {
 	string_append(json_string, "{\n");
-	i32 i = 0;
+	I32 i = 0;
 	for (const auto &[key, value] : self.as_object)
 	{
 		if (i > 0)
@@ -460,7 +460,7 @@ json_value_init_as_bool(bool value)
 }
 
 JSON_Value
-json_value_init_as_number(f64 value)
+json_value_init_as_number(F64 value)
 {
 	return JSON_Value {
 		.kind = JSON_VALUE_KIND_NUMBER,
@@ -526,8 +526,8 @@ json_value_from_file(const char *filepath, memory::Allocator *allocator)
 		};
 	}
 
-	auto file_data  = memory::allocate(allocator, file_size);
-	auto bytes_read = platform_file_read(filepath, Platform_Memory{(u8 *)file_data, file_size});
+	auto file_data  = memory::allocate<U8>(allocator, file_size);
+	auto bytes_read = platform_file_read(filepath, Platform_Memory{(U8 *)file_data, file_size});
 	if (bytes_read != file_size)
 	{
 		memory::deallocate(allocator, file_data);
@@ -627,7 +627,7 @@ json_value_get_as_bool(const JSON_Value &self)
 	return self.as_bool;
 }
 
-f64
+F64
 json_value_get_as_number(const JSON_Value &self)
 {
 	validate(self.kind == JSON_VALUE_KIND_NUMBER, "[JSON]: Expected JSON_VALUE_KIND_NUMBER.");
@@ -673,7 +673,7 @@ json_value_to_file(const JSON_Value &self, const char *filepath)
 	if (error)
 		return error;
 
-	auto file_size = platform_file_write(filepath, Platform_Memory{(u8 *)json_string.data, json_string.count});
+	auto file_size = platform_file_write(filepath, Platform_Memory{(U8 *)json_string.data, json_string.count});
 	if (file_size != json_string.count)
 		return Error{"[JSON]: Could not write file '{}'.", filepath};
 	return {};
diff --git a/core/json.h b/core/json.h
index 3a2bef67..d8e0ebb8 100644
--- a/core/json.h
+++ b/core/json.h
@@ -8,7 +8,7 @@
 #include "core/containers/string.h"
 #include "core/containers/hash_table.h"
 
-enum JSON_VALUE_KIND : u8
+enum JSON_VALUE_KIND : U8
 {
 	JSON_VALUE_KIND_INVALID,
 	JSON_VALUE_KIND_NULL,
@@ -25,7 +25,7 @@ struct JSON_Value
 	union
 	{
 		bool as_bool;
-		f64 as_number;
+		F64 as_number;
 		String as_string;
 		Array<JSON_Value> as_array;
 		Hash_Table<String, JSON_Value> as_object;
@@ -42,7 +42,7 @@ CORE_API JSON_Value
 json_value_init_as_bool(bool value = false);
 
 CORE_API JSON_Value
-json_value_init_as_number(f64 value = 0.0f);
+json_value_init_as_number(F64 value = 0.0f);
 
 CORE_API JSON_Value
 json_value_init_as_string(memory::Allocator *allocator = memory::heap_allocator());
@@ -98,7 +98,7 @@ json_value_object_find(const JSON_Value &self, const char *name)
 CORE_API bool
 json_value_get_as_bool(const JSON_Value &self);
 
-CORE_API f64
+CORE_API F64
 json_value_get_as_number(const JSON_Value &self);
 
 CORE_API String
diff --git a/core/math/f32.h b/core/math/f32.h
new file mode 100644
index 00000000..188b26d6
--- /dev/null
+++ b/core/math/f32.h
@@ -0,0 +1,273 @@
+#pragma once
+
+#include <core/defines.h>
+
+#include <cmath>
+#include <limits>
+
+// ============================================================================
+// F32 scalar helpers + constants.
+//
+// All angle-accepting functions take radians. Use F32_TO_RADIANS / F32_TO_DEGREES
+// for conversion.
+// ============================================================================
+
+// ---- Angular constants -----------------------------------------------------
+
+static constexpr F32 F32_PI         = 3.14159265358979323846f;
+static constexpr F32 F32_TAU        = F32_PI * 2.0f;
+static constexpr F32 F32_PI_OVER_2  = F32_PI * 0.5f;
+static constexpr F32 F32_TO_DEGREES = 360.0f / F32_TAU;
+static constexpr F32 F32_TO_RADIANS = F32_TAU / 360.0f;
+
+// ---- Numeric limits / special values ---------------------------------------
+// F32_MIN / F32_MAX live in core/defines.h alongside the primitive aliases.
+
+static constexpr F32 F32_EPSILON      = std::numeric_limits<F32>::epsilon();
+static constexpr F32 F32_INFINITY     = std::numeric_limits<F32>::infinity();
+static constexpr F32 F32_NEG_INFINITY = -F32_INFINITY;
+static constexpr F32 F32_NAN          = std::numeric_limits<F32>::quiet_NaN();
+
+// ---- Transcendental / trigonometric wrappers -------------------------------
+
+inline static F32
+f32_sqrt(F32 x)
+{
+	return ::sqrtf(x);
+}
+
+inline static F32
+f32_sin(F32 x)
+{
+	return ::sinf(x);
+}
+
+inline static F32
+f32_asin(F32 x)
+{
+	return ::asinf(x);
+}
+
+inline static F32
+f32_cos(F32 x)
+{
+	return ::cosf(x);
+}
+
+inline static F32
+f32_acos(F32 x)
+{
+	return ::acosf(x);
+}
+
+inline static F32
+f32_tan(F32 x)
+{
+	return ::tanf(x);
+}
+
+inline static F32
+f32_atan2(F32 y, F32 x)
+{
+	return ::atan2f(y, x);
+}
+
+inline static F32
+f32_power(F32 base, F32 exponent)
+{
+	return ::powf(base, exponent);
+}
+
+inline static F32
+f32_modulo(F32 x, F32 divisor)
+{
+	return ::fmodf(x, divisor);
+}
+
+// ---- Basic arithmetic helpers ----------------------------------------------
+
+inline static F32
+f32_abs(F32 x)
+{
+	return x < 0.0f ? -x : x;
+}
+
+inline static F32
+f32_sign(F32 x)
+{
+	if (x > 0.0f) return  1.0f;
+	if (x < 0.0f) return -1.0f;
+	return 0.0f;
+}
+
+inline static F32
+f32_min(F32 a, F32 b)
+{
+	return a < b ? a : b;
+}
+
+inline static F32
+f32_max(F32 a, F32 b)
+{
+	return a > b ? a : b;
+}
+
+inline static F32
+f32_clamp(F32 x, F32 lo, F32 hi)
+{
+	if (x < lo) return lo;
+	if (x > hi) return hi;
+	return x;
+}
+
+inline static F32
+f32_lerp(F32 a, F32 b, F32 t)
+{
+	return a + t * (b - a);
+}
+
+// ---- Special-value tests ---------------------------------------------------
+
+inline static bool
+f32_is_nan(F32 x)
+{
+	return x != x;
+}
+
+inline static bool
+f32_is_infinite(F32 x)
+{
+	return x == F32_INFINITY || x == F32_NEG_INFINITY;
+}
+
+inline static bool
+f32_is_finite(F32 x)
+{
+	return !f32_is_nan(x) && !f32_is_infinite(x);
+}
+
+// Absolute-tolerance comparison. Caller specifies epsilon — no magic default,
+// since the right tolerance depends on the magnitude of the values being compared.
+// For "approximately equal in relative terms" use `f32_abs(a - b) <= epsilon * f32_max(f32_abs(a), f32_abs(b))`.
+inline static bool
+f32_approx_equal(F32 a, F32 b, F32 epsilon)
+{
+	return f32_abs(a - b) <= epsilon;
+}
+
+// ---- Interpolation beyond lerp ---------------------------------------------
+
+// Hermite 3t² - 2t³ smoothing. Input is remapped to [0,1] over [edge0, edge1] then smoothed.
+inline static F32
+f32_smoothstep(F32 edge0, F32 edge1, F32 x)
+{
+	F32 t = f32_clamp((x - edge0) / (edge1 - edge0), 0.0f, 1.0f);
+	return t * t * (3.0f - 2.0f * t);
+}
+
+// Perlin's improved smoothstep: 6t⁵ - 15t⁴ + 10t³ (C² continuous).
+inline static F32
+f32_smootherstep(F32 edge0, F32 edge1, F32 x)
+{
+	F32 t = f32_clamp((x - edge0) / (edge1 - edge0), 0.0f, 1.0f);
+	return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
+}
+
+// Critically-damped spring smoothing. Returns the new current value and mutates
+// `velocity` in place. Standard Unity-style signature. Pass the current and target
+// values each frame and a `smooth_time` measured in seconds; the function settles
+// `current` toward `target` over roughly `smooth_time`.
+inline static F32
+f32_smooth_damp(F32 current, F32 target, F32 *velocity, F32 smooth_time, F32 dt)
+{
+	smooth_time = f32_max(smooth_time, 0.0001f);
+	F32 omega   = 2.0f / smooth_time;
+	F32 x       = omega * dt;
+	F32 exp_    = 1.0f / (1.0f + x + 0.48f * x * x + 0.235f * x * x * x);
+	F32 delta   = current - target;
+	F32 temp    = (*velocity + omega * delta) * dt;
+	*velocity   = (*velocity - omega * temp) * exp_;
+	return target + (delta + temp) * exp_;
+}
+
+// ---- Easing curves ---------------------------------------------------------
+// Input t in [0, 1] (clamped). Output in [0, 1]. `ease_in_out` variants are
+// symmetric (ease in over first half, ease out over second).
+
+inline static F32
+f32_ease_in_quad(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	return t * t;
+}
+
+inline static F32
+f32_ease_out_quad(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	return 1.0f - (1.0f - t) * (1.0f - t);
+}
+
+inline static F32
+f32_ease_in_out_quad(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	return t < 0.5f ? 2.0f * t * t : 1.0f - 2.0f * (1.0f - t) * (1.0f - t);
+}
+
+inline static F32
+f32_ease_in_cubic(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	return t * t * t;
+}
+
+inline static F32
+f32_ease_out_cubic(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	F32 one_minus = 1.0f - t;
+	return 1.0f - one_minus * one_minus * one_minus;
+}
+
+inline static F32
+f32_ease_in_out_cubic(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	if (t < 0.5f)
+		return 4.0f * t * t * t;
+	F32 one_minus = 1.0f - t;
+	return 1.0f - 4.0f * one_minus * one_minus * one_minus;
+}
+
+inline static F32
+f32_ease_in_elastic(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	if (t == 0.0f) return 0.0f;
+	if (t == 1.0f) return 1.0f;
+	constexpr F32 c4 = F32_TAU / 3.0f;
+	return -f32_power(2.0f, 10.0f * t - 10.0f) * f32_sin((t * 10.0f - 10.75f) * c4);
+}
+
+inline static F32
+f32_ease_out_elastic(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	if (t == 0.0f) return 0.0f;
+	if (t == 1.0f) return 1.0f;
+	constexpr F32 c4 = F32_TAU / 3.0f;
+	return f32_power(2.0f, -10.0f * t) * f32_sin((t * 10.0f - 0.75f) * c4) + 1.0f;
+}
+
+inline static F32
+f32_ease_in_out_elastic(F32 t)
+{
+	t = f32_clamp(t, 0.0f, 1.0f);
+	if (t == 0.0f) return 0.0f;
+	if (t == 1.0f) return 1.0f;
+	constexpr F32 c5 = F32_TAU / 4.5f;
+	if (t < 0.5f)
+		return -(f32_power(2.0f,  20.0f * t - 10.0f) * f32_sin((20.0f * t - 11.125f) * c5)) * 0.5f;
+	return   (f32_power(2.0f, -20.0f * t + 10.0f) * f32_sin((20.0f * t - 11.125f) * c5)) * 0.5f + 1.0f;
+}
diff --git a/core/math/f32x2.h b/core/math/f32x2.h
new file mode 100644
index 00000000..2df9912f
--- /dev/null
+++ b/core/math/f32x2.h
@@ -0,0 +1,160 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+
+// ============================================================================
+// F32x2 — 2D F32 vector. Scalar (SIMD would be overkill for 2 lanes).
+// ============================================================================
+
+struct F32x2
+{
+	F32 x, y;
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F32x2
+operator+(const F32x2 &a, const F32x2 &b)
+{
+	return F32x2{a.x + b.x, a.y + b.y};
+}
+
+inline static F32x2 &
+operator+=(F32x2 &a, const F32x2 &b)
+{
+	a = a + b;
+	return a;
+}
+
+inline static F32x2
+operator-(const F32x2 &a)
+{
+	return F32x2{-a.x, -a.y};
+}
+
+inline static F32x2
+operator-(const F32x2 &a, const F32x2 &b)
+{
+	return F32x2{a.x - b.x, a.y - b.y};
+}
+
+inline static F32x2 &
+operator-=(F32x2 &a, const F32x2 &b)
+{
+	a = a - b;
+	return a;
+}
+
+inline static F32x2
+operator*(const F32x2 &a, F32 s)
+{
+	return F32x2{a.x * s, a.y * s};
+}
+
+inline static F32x2
+operator*(F32 s, const F32x2 &a)
+{
+	return a * s;
+}
+
+inline static F32x2 &
+operator*=(F32x2 &a, F32 s)
+{
+	a = a * s;
+	return a;
+}
+
+inline static F32x2
+operator/(const F32x2 &a, F32 s)
+{
+	return a * (1.0f / s);
+}
+
+inline static F32x2 &
+operator/=(F32x2 &a, F32 s)
+{
+	a = a / s;
+	return a;
+}
+
+inline static bool
+operator==(const F32x2 &a, const F32x2 &b)
+{
+	return a.x == b.x && a.y == b.y;
+}
+
+// ---- Free functions --------------------------------------------------------
+
+inline static F32x2
+f32x2_from_f32(F32 s)
+{
+	return F32x2{s, s};
+}
+
+inline static F32
+f32x2_dot(const F32x2 &a, const F32x2 &b)
+{
+	return a.x * b.x + a.y * b.y;
+}
+
+// 2D "cross product" — scalar z-component of the 3D cross. Useful for orientation tests.
+inline static F32
+f32x2_cross(const F32x2 &a, const F32x2 &b)
+{
+	return a.x * b.y - a.y * b.x;
+}
+
+inline static F32
+f32x2_length_squared(const F32x2 &a)
+{
+	return a.x * a.x + a.y * a.y;
+}
+
+inline static F32
+f32x2_length(const F32x2 &a)
+{
+	return f32_sqrt(f32x2_length_squared(a));
+}
+
+inline static F32x2
+f32x2_normalize(const F32x2 &a)
+{
+	return a / f32x2_length(a);
+}
+
+inline static F32x2
+f32x2_min(const F32x2 &a, const F32x2 &b)
+{
+	return F32x2{f32_min(a.x, b.x), f32_min(a.y, b.y)};
+}
+
+inline static F32x2
+f32x2_max(const F32x2 &a, const F32x2 &b)
+{
+	return F32x2{f32_max(a.x, b.x), f32_max(a.y, b.y)};
+}
+
+inline static F32x2
+f32x2_lerp(const F32x2 &a, const F32x2 &b, F32 t)
+{
+	return F32x2{f32_lerp(a.x, b.x, t), f32_lerp(a.y, b.y, t)};
+}
+
+inline static F32x2
+f32x2_clamp(const F32x2 &v, const F32x2 &lo, const F32x2 &hi)
+{
+	return f32x2_min(f32x2_max(v, lo), hi);
+}
+
+inline static bool
+f32x2_approx_equal(const F32x2 &a, const F32x2 &b, F32 epsilon)
+{
+	return f32_approx_equal(a.x, b.x, epsilon)
+	    && f32_approx_equal(a.y, b.y, epsilon);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr F32x2 F32X2_ZERO = {0.0f, 0.0f};
+static constexpr F32x2 F32X2_ONE  = {1.0f, 1.0f};
diff --git a/core/math/f32x2x2.h b/core/math/f32x2x2.h
new file mode 100644
index 00000000..a56b88be
--- /dev/null
+++ b/core/math/f32x2x2.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+#include <core/math/f32x2.h>
+
+// ============================================================================
+// F32x2x2 — 2x2 F32 matrix, row-major, scalar (2x2 is not worth SIMD).
+// Memory layout: [m00 m01 | m10 m11], 16 bytes.
+// ============================================================================
+
+struct F32x2x2
+{
+	F32 m00, m01;
+	F32 m10, m11;
+};
+
+inline static F32x2x2
+f32x2x2_identity()
+{
+	return F32x2x2{1.0f, 0.0f, 0.0f, 1.0f};
+}
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F32x2x2
+operator+(const F32x2x2 &A, const F32x2x2 &B)
+{
+	return F32x2x2{A.m00 + B.m00, A.m01 + B.m01, A.m10 + B.m10, A.m11 + B.m11};
+}
+
+inline static F32x2x2
+operator-(const F32x2x2 &M)
+{
+	return F32x2x2{-M.m00, -M.m01, -M.m10, -M.m11};
+}
+
+inline static F32x2x2
+operator-(const F32x2x2 &A, const F32x2x2 &B)
+{
+	return F32x2x2{A.m00 - B.m00, A.m01 - B.m01, A.m10 - B.m10, A.m11 - B.m11};
+}
+
+inline static F32x2x2
+operator*(const F32x2x2 &M, F32 s)
+{
+	return F32x2x2{M.m00 * s, M.m01 * s, M.m10 * s, M.m11 * s};
+}
+
+inline static F32x2x2
+operator*(F32 s, const F32x2x2 &M) { return M * s; }
+
+inline static F32x2x2
+operator/(const F32x2x2 &M, F32 s) { return M * (1.0f / s); }
+
+inline static bool
+operator==(const F32x2x2 &A, const F32x2x2 &B)
+{
+	return A.m00 == B.m00 && A.m01 == B.m01 && A.m10 == B.m10 && A.m11 == B.m11;
+}
+
+// v * M (row-vector convention).
+inline static F32x2
+operator*(const F32x2 &v, const F32x2x2 &M)
+{
+	return F32x2{
+		v.x * M.m00 + v.y * M.m10,
+		v.x * M.m01 + v.y * M.m11
+	};
+}
+
+// A * B = row i of result is A.row[i] * B.
+inline static F32x2x2
+operator*(const F32x2x2 &A, const F32x2x2 &B)
+{
+	return F32x2x2{
+		A.m00 * B.m00 + A.m01 * B.m10,  A.m00 * B.m01 + A.m01 * B.m11,
+		A.m10 * B.m00 + A.m11 * B.m10,  A.m10 * B.m01 + A.m11 * B.m11
+	};
+}
+
+inline static F32x2x2
+f32x2x2_transpose(const F32x2x2 &M)
+{
+	return F32x2x2{M.m00, M.m10, M.m01, M.m11};
+}
+
+inline static F32
+f32x2x2_determinant(const F32x2x2 &M) { return M.m00 * M.m11 - M.m01 * M.m10; }
+
+inline static bool
+f32x2x2_is_invertible(const F32x2x2 &M) { return f32x2x2_determinant(M) != 0.0f; }
+
+inline static F32x2x2
+f32x2x2_inverse(const F32x2x2 &M)
+{
+	F32 d = f32x2x2_determinant(M);
+	if (d == 0.0f)
+		return F32x2x2{};
+	F32 inv_d = 1.0f / d;
+	return F32x2x2{
+		 M.m11 * inv_d, -M.m01 * inv_d,
+		-M.m10 * inv_d,  M.m00 * inv_d
+	};
+}
diff --git a/core/math/f32x3.h b/core/math/f32x3.h
new file mode 100644
index 00000000..509aff1c
--- /dev/null
+++ b/core/math/f32x3.h
@@ -0,0 +1,172 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+
+// ============================================================================
+// F32x3 — 3D F32 vector. Packed 12 bytes (no alignas) for GPU-attribute interop.
+// ============================================================================
+
+struct F32x3
+{
+	F32 x, y, z;
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F32x3
+operator+(const F32x3 &a, const F32x3 &b)
+{
+	return F32x3{a.x + b.x, a.y + b.y, a.z + b.z};
+}
+
+inline static F32x3 &
+operator+=(F32x3 &a, const F32x3 &b)
+{
+	a = a + b;
+	return a;
+}
+
+inline static F32x3
+operator-(const F32x3 &a)
+{
+	return F32x3{-a.x, -a.y, -a.z};
+}
+
+inline static F32x3
+operator-(const F32x3 &a, const F32x3 &b)
+{
+	return F32x3{a.x - b.x, a.y - b.y, a.z - b.z};
+}
+
+inline static F32x3 &
+operator-=(F32x3 &a, const F32x3 &b)
+{
+	a = a - b;
+	return a;
+}
+
+inline static F32x3
+operator*(const F32x3 &a, F32 s)
+{
+	return F32x3{a.x * s, a.y * s, a.z * s};
+}
+
+inline static F32x3
+operator*(F32 s, const F32x3 &a)
+{
+	return a * s;
+}
+
+inline static F32x3 &
+operator*=(F32x3 &a, F32 s)
+{
+	a = a * s;
+	return a;
+}
+
+inline static F32x3
+operator/(const F32x3 &a, F32 s)
+{
+	return a * (1.0f / s);
+}
+
+inline static F32x3 &
+operator/=(F32x3 &a, F32 s)
+{
+	a = a / s;
+	return a;
+}
+
+inline static bool
+operator==(const F32x3 &a, const F32x3 &b)
+{
+	return a.x == b.x && a.y == b.y && a.z == b.z;
+}
+
+// ---- Free functions --------------------------------------------------------
+
+inline static F32x3
+f32x3_from_f32(F32 s)
+{
+	return F32x3{s, s, s};
+}
+
+inline static F32
+f32x3_dot(const F32x3 &a, const F32x3 &b)
+{
+	return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+inline static F32x3
+f32x3_cross(const F32x3 &a, const F32x3 &b)
+{
+	return F32x3{
+		a.y * b.z - a.z * b.y,
+		a.z * b.x - a.x * b.z,
+		a.x * b.y - a.y * b.x
+	};
+}
+
+inline static F32
+f32x3_length_squared(const F32x3 &a)
+{
+	return f32x3_dot(a, a);
+}
+
+inline static F32
+f32x3_length(const F32x3 &a)
+{
+	return f32_sqrt(f32x3_length_squared(a));
+}
+
+inline static F32x3
+f32x3_normalize(const F32x3 &a)
+{
+	return a / f32x3_length(a);
+}
+
+inline static F32x3
+f32x3_min(const F32x3 &a, const F32x3 &b)
+{
+	return F32x3{f32_min(a.x, b.x), f32_min(a.y, b.y), f32_min(a.z, b.z)};
+}
+
+inline static F32x3
+f32x3_max(const F32x3 &a, const F32x3 &b)
+{
+	return F32x3{f32_max(a.x, b.x), f32_max(a.y, b.y), f32_max(a.z, b.z)};
+}
+
+inline static F32x3
+f32x3_lerp(const F32x3 &a, const F32x3 &b, F32 t)
+{
+	return F32x3{f32_lerp(a.x, b.x, t), f32_lerp(a.y, b.y, t), f32_lerp(a.z, b.z, t)};
+}
+
+inline static F32x3
+f32x3_clamp(const F32x3 &v, const F32x3 &lo, const F32x3 &hi)
+{
+	return f32x3_min(f32x3_max(v, lo), hi);
+}
+
+inline static bool
+f32x3_approx_equal(const F32x3 &a, const F32x3 &b, F32 epsilon)
+{
+	return f32_approx_equal(a.x, b.x, epsilon)
+	    && f32_approx_equal(a.y, b.y, epsilon)
+	    && f32_approx_equal(a.z, b.z, epsilon);
+}
+
+// ---- Canonical-convention constants ----------------------------------------
+// Right-handed, Y-up, +Z toward the viewer. See docs/math.md for the full
+// coordinate convention.
+
+static constexpr F32x3 F32X3_ZERO     = { 0.0f,  0.0f,  0.0f};
+static constexpr F32x3 F32X3_ONE      = { 1.0f,  1.0f,  1.0f};
+static constexpr F32x3 F32X3_RIGHT    = { 1.0f,  0.0f,  0.0f};
+static constexpr F32x3 F32X3_LEFT     = {-1.0f,  0.0f,  0.0f};
+static constexpr F32x3 F32X3_UP       = { 0.0f,  1.0f,  0.0f};
+static constexpr F32x3 F32X3_DOWN     = { 0.0f, -1.0f,  0.0f};
+static constexpr F32x3 F32X3_FORWARD  = { 0.0f,  0.0f, -1.0f};
+static constexpr F32x3 F32X3_BACKWARD = { 0.0f,  0.0f,  1.0f};
diff --git a/core/math/f32x3x3.h b/core/math/f32x3x3.h
new file mode 100644
index 00000000..1553f2dd
--- /dev/null
+++ b/core/math/f32x3x3.h
@@ -0,0 +1,172 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+#include <core/math/f32x3.h>
+#include <core/math/f32x4.h>   // For Simd_F32x4 row storage.
+
+// ============================================================================
+// F32x3x3 — 3x3 F32 matrix, row-major, SIMD-backed.
+//
+// Storage: 3 padded 4-wide rows (48 bytes total). This matches std140 / MSL
+// matrix_float3x3 layout exactly — CPU→GPU upload is a direct memcpy. Net cost:
+// 12 bytes of padding per matrix.
+// ============================================================================
+
+struct alignas(16) F32x3x3
+{
+	union
+	{
+		struct
+		{
+			F32 m00, m01, m02, _pad0;
+			F32 m10, m11, m12, _pad1;
+			F32 m20, m21, m22, _pad2;
+		};
+		F32x4 rows[3];
+	};
+};
+
+inline static F32x3x3
+f32x3x3_identity()
+{
+	return F32x3x3{
+		1.0f, 0.0f, 0.0f, 0.0f,
+		0.0f, 1.0f, 0.0f, 0.0f,
+		0.0f, 0.0f, 1.0f, 0.0f
+	};
+}
+
+// ---- Element-wise operators ------------------------------------------------
+
+inline static F32x3x3
+operator+(const F32x3x3 &A, const F32x3x3 &B)
+{
+	F32x3x3 R;
+	R.rows[0] = A.rows[0] + B.rows[0];
+	R.rows[1] = A.rows[1] + B.rows[1];
+	R.rows[2] = A.rows[2] + B.rows[2];
+	return R;
+}
+
+inline static F32x3x3
+operator-(const F32x3x3 &M)
+{
+	F32x3x3 R;
+	R.rows[0] = -M.rows[0];
+	R.rows[1] = -M.rows[1];
+	R.rows[2] = -M.rows[2];
+	return R;
+}
+
+inline static F32x3x3
+operator-(const F32x3x3 &A, const F32x3x3 &B)
+{
+	F32x3x3 R;
+	R.rows[0] = A.rows[0] - B.rows[0];
+	R.rows[1] = A.rows[1] - B.rows[1];
+	R.rows[2] = A.rows[2] - B.rows[2];
+	return R;
+}
+
+inline static F32x3x3
+operator*(const F32x3x3 &M, F32 s)
+{
+	F32x3x3 R;
+	R.rows[0] = M.rows[0] * s;
+	R.rows[1] = M.rows[1] * s;
+	R.rows[2] = M.rows[2] * s;
+	return R;
+}
+
+inline static F32x3x3
+operator*(F32 s, const F32x3x3 &M) { return M * s; }
+
+inline static F32x3x3
+operator/(const F32x3x3 &M, F32 s) { return M * (1.0f / s); }
+
+inline static bool
+operator==(const F32x3x3 &A, const F32x3x3 &B)
+{
+	return A.m00 == B.m00 && A.m01 == B.m01 && A.m02 == B.m02
+	    && A.m10 == B.m10 && A.m11 == B.m11 && A.m12 == B.m12
+	    && A.m20 == B.m20 && A.m21 == B.m21 && A.m22 == B.m22;
+}
+
+// ---- Vec-mat / mat-mat multiply (row-vector convention) --------------------
+
+inline static F32x3
+operator*(const F32x3 &v, const F32x3x3 &M)
+{
+	return F32x3{
+		v.x * M.m00 + v.y * M.m10 + v.z * M.m20,
+		v.x * M.m01 + v.y * M.m11 + v.z * M.m21,
+		v.x * M.m02 + v.y * M.m12 + v.z * M.m22
+	};
+}
+
+inline static F32x3x3
+operator*(const F32x3x3 &A, const F32x3x3 &B)
+{
+	F32x3x3 R;
+
+	R.m00 = A.m00 * B.m00 + A.m01 * B.m10 + A.m02 * B.m20;
+	R.m01 = A.m00 * B.m01 + A.m01 * B.m11 + A.m02 * B.m21;
+	R.m02 = A.m00 * B.m02 + A.m01 * B.m12 + A.m02 * B.m22;
+	R._pad0 = 0.0f;
+
+	R.m10 = A.m10 * B.m00 + A.m11 * B.m10 + A.m12 * B.m20;
+	R.m11 = A.m10 * B.m01 + A.m11 * B.m11 + A.m12 * B.m21;
+	R.m12 = A.m10 * B.m02 + A.m11 * B.m12 + A.m12 * B.m22;
+	R._pad1 = 0.0f;
+
+	R.m20 = A.m20 * B.m00 + A.m21 * B.m10 + A.m22 * B.m20;
+	R.m21 = A.m20 * B.m01 + A.m21 * B.m11 + A.m22 * B.m21;
+	R.m22 = A.m20 * B.m02 + A.m21 * B.m12 + A.m22 * B.m22;
+	R._pad2 = 0.0f;
+
+	return R;
+}
+
+inline static F32x3x3
+f32x3x3_transpose(const F32x3x3 &M)
+{
+	return F32x3x3{
+		M.m00, M.m10, M.m20, 0.0f,
+		M.m01, M.m11, M.m21, 0.0f,
+		M.m02, M.m12, M.m22, 0.0f
+	};
+}
+
+inline static F32
+f32x3x3_determinant(const F32x3x3 &M)
+{
+	return M.m00 * (M.m11 * M.m22 - M.m12 * M.m21)
+	     - M.m01 * (M.m10 * M.m22 - M.m12 * M.m20)
+	     + M.m02 * (M.m10 * M.m21 - M.m11 * M.m20);
+}
+
+inline static bool
+f32x3x3_is_invertible(const F32x3x3 &M) { return f32x3x3_determinant(M) != 0.0f; }
+
+inline static F32x3x3
+f32x3x3_inverse(const F32x3x3 &M)
+{
+	F32 d = f32x3x3_determinant(M);
+	if (d == 0.0f)
+		return F32x3x3{};
+	F32 inv_d = 1.0f / d;
+	return F32x3x3{
+		 (M.m11 * M.m22 - M.m12 * M.m21) * inv_d, -(M.m01 * M.m22 - M.m02 * M.m21) * inv_d,  (M.m01 * M.m12 - M.m02 * M.m11) * inv_d, 0.0f,
+		-(M.m10 * M.m22 - M.m12 * M.m20) * inv_d,  (M.m00 * M.m22 - M.m02 * M.m20) * inv_d, -(M.m00 * M.m12 - M.m02 * M.m10) * inv_d, 0.0f,
+		 (M.m10 * M.m21 - M.m11 * M.m20) * inv_d, -(M.m00 * M.m21 - M.m01 * M.m20) * inv_d,  (M.m00 * M.m11 - M.m01 * M.m10) * inv_d, 0.0f
+	};
+}
+
+inline static bool
+f32x3x3_approx_equal(const F32x3x3 &A, const F32x3x3 &B, F32 epsilon)
+{
+	return f32_approx_equal(A.m00, B.m00, epsilon) && f32_approx_equal(A.m01, B.m01, epsilon) && f32_approx_equal(A.m02, B.m02, epsilon)
+	    && f32_approx_equal(A.m10, B.m10, epsilon) && f32_approx_equal(A.m11, B.m11, epsilon) && f32_approx_equal(A.m12, B.m12, epsilon)
+	    && f32_approx_equal(A.m20, B.m20, epsilon) && f32_approx_equal(A.m21, B.m21, epsilon) && f32_approx_equal(A.m22, B.m22, epsilon);
+}
diff --git a/core/math/f32x4.h b/core/math/f32x4.h
new file mode 100644
index 00000000..26927286
--- /dev/null
+++ b/core/math/f32x4.h
@@ -0,0 +1,245 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+
+// ============================================================================
+// F32x4 — 4D F32 vector, SIMD-backed.
+//
+// Storage: alignas(16). Union with scalar fields (x, y, z, w) and the SIMD lane
+// register. Field access is unchanged from scalar code; hot paths read/write
+// `simd` directly.
+//
+// Arch gates are set by CMake (see core/CMakeLists.txt). The scalar fallback
+// is always compiled (for SIMD_FORCE_SCALAR parity testing).
+// ============================================================================
+
+#if defined(SIMD_FORCE_SCALAR)
+	struct Simd_F32x4 { F32 v[4]; };
+#elif defined(SIMD_NEON)
+	#include <arm_neon.h>
+	typedef float32x4_t Simd_F32x4;
+#elif defined(SIMD_AVX)
+	#include <immintrin.h>
+	typedef __m128 Simd_F32x4;
+#else
+	struct Simd_F32x4 { F32 v[4]; };
+#endif
+
+struct alignas(16) F32x4
+{
+	union
+	{
+		struct { F32 x, y, z, w; };
+		Simd_F32x4 simd;
+	};
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F32x4
+operator+(const F32x4 &a, const F32x4 &b)
+{
+	F32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vaddq_f32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_add_ps(a.simd, b.simd);
+#else
+	r.x = a.x + b.x; r.y = a.y + b.y; r.z = a.z + b.z; r.w = a.w + b.w;
+#endif
+	return r;
+}
+
+inline static F32x4 &
+operator+=(F32x4 &a, const F32x4 &b)
+{
+	a = a + b;
+	return a;
+}
+
+inline static F32x4
+operator-(const F32x4 &a)
+{
+	F32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vnegq_f32(a.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_sub_ps(_mm_setzero_ps(), a.simd);
+#else
+	r.x = -a.x; r.y = -a.y; r.z = -a.z; r.w = -a.w;
+#endif
+	return r;
+}
+
+inline static F32x4
+operator-(const F32x4 &a, const F32x4 &b)
+{
+	F32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vsubq_f32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_sub_ps(a.simd, b.simd);
+#else
+	r.x = a.x - b.x; r.y = a.y - b.y; r.z = a.z - b.z; r.w = a.w - b.w;
+#endif
+	return r;
+}
+
+inline static F32x4 &
+operator-=(F32x4 &a, const F32x4 &b)
+{
+	a = a - b;
+	return a;
+}
+
+inline static F32x4
+operator*(const F32x4 &a, F32 s)
+{
+	F32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vmulq_n_f32(a.simd, s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_mul_ps(a.simd, _mm_set1_ps(s));
+#else
+	r.x = a.x * s; r.y = a.y * s; r.z = a.z * s; r.w = a.w * s;
+#endif
+	return r;
+}
+
+inline static F32x4
+operator*(F32 s, const F32x4 &a)
+{
+	return a * s;
+}
+
+inline static F32x4 &
+operator*=(F32x4 &a, F32 s)
+{
+	a = a * s;
+	return a;
+}
+
+inline static F32x4
+operator/(const F32x4 &a, F32 s)
+{
+	// Multiply by reciprocal — faster than lane-wise divide on most ISAs.
+	return a * (1.0f / s);
+}
+
+inline static F32x4 &
+operator/=(F32x4 &a, F32 s)
+{
+	a = a / s;
+	return a;
+}
+
+inline static bool
+operator==(const F32x4 &a, const F32x4 &b)
+{
+	return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
+}
+
+// ---- Free functions --------------------------------------------------------
+
+inline static F32x4
+f32x4_from_f32(F32 s)
+{
+	F32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vdupq_n_f32(s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_set1_ps(s);
+#else
+	r.x = s; r.y = s; r.z = s; r.w = s;
+#endif
+	return r;
+}
+
+inline static F32
+f32x4_dot(const F32x4 &a, const F32x4 &b)
+{
+#if defined(SIMD_NEON)
+	return vaddvq_f32(vmulq_f32(a.simd, b.simd));
+#elif defined(SIMD_AVX)
+	// _mm_dp_ps is SSE4.1; guaranteed under AVX baseline. Mask 0xFF = "multiply
+	// all 4 lanes, sum into lane 0" and extract as scalar.
+	return _mm_cvtss_f32(_mm_dp_ps(a.simd, b.simd, 0xFF));
+#else
+	return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+#endif
+}
+
+inline static F32
+f32x4_length_squared(const F32x4 &a)
+{
+	return f32x4_dot(a, a);
+}
+
+inline static F32
+f32x4_length(const F32x4 &a)
+{
+	return f32_sqrt(f32x4_length_squared(a));
+}
+
+inline static F32x4
+f32x4_normalize(const F32x4 &a)
+{
+	return a / f32x4_length(a);
+}
+
+inline static F32x4
+f32x4_min(const F32x4 &a, const F32x4 &b)
+{
+	F32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vminq_f32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_min_ps(a.simd, b.simd);
+#else
+	r.x = f32_min(a.x, b.x); r.y = f32_min(a.y, b.y);
+	r.z = f32_min(a.z, b.z); r.w = f32_min(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static F32x4
+f32x4_max(const F32x4 &a, const F32x4 &b)
+{
+	F32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vmaxq_f32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_max_ps(a.simd, b.simd);
+#else
+	r.x = f32_max(a.x, b.x); r.y = f32_max(a.y, b.y);
+	r.z = f32_max(a.z, b.z); r.w = f32_max(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static F32x4
+f32x4_lerp(const F32x4 &a, const F32x4 &b, F32 t)
+{
+	return a + (b - a) * t;
+}
+
+inline static F32x4
+f32x4_clamp(const F32x4 &v, const F32x4 &lo, const F32x4 &hi)
+{
+	return f32x4_min(f32x4_max(v, lo), hi);
+}
+
+inline static bool
+f32x4_approx_equal(const F32x4 &a, const F32x4 &b, F32 epsilon)
+{
+	return f32_approx_equal(a.x, b.x, epsilon)
+	    && f32_approx_equal(a.y, b.y, epsilon)
+	    && f32_approx_equal(a.z, b.z, epsilon)
+	    && f32_approx_equal(a.w, b.w, epsilon);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr F32x4 F32X4_ZERO = {0.0f, 0.0f, 0.0f, 0.0f};
+static constexpr F32x4 F32X4_ONE  = {1.0f, 1.0f, 1.0f, 1.0f};
diff --git a/core/math/f32x4x4.h b/core/math/f32x4x4.h
new file mode 100644
index 00000000..903ded63
--- /dev/null
+++ b/core/math/f32x4x4.h
@@ -0,0 +1,459 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+#include <core/math/f32x3.h>
+#include <core/math/f32x4.h>
+
+// ============================================================================
+// F32x4x4 — 4x4 F32 matrix, row-major, SIMD-backed.
+//
+// Memory layout: [m00 m01 m02 m03 | m10 m11 m12 m13 | m20 m21 m22 m23 | m30 m31 m32 m33]
+// Each row is a SIMD-backed F32x4, aligned to 16 bytes. Total: 64 bytes.
+//
+// Multiplication convention: row-vector. `v * M` transforms the point `v`.
+// Translation lives in the last row (m30, m31, m32). See docs/math.md.
+// ============================================================================
+
+struct alignas(16) F32x4x4
+{
+	union
+	{
+		struct
+		{
+			F32 m00, m01, m02, m03;
+			F32 m10, m11, m12, m13;
+			F32 m20, m21, m22, m23;
+			F32 m30, m31, m32, m33;
+		};
+		F32x4 rows[4];
+	};
+};
+
+// ---- Indexing --------------------------------------------------------------
+
+inline static const F32 &
+f32x4x4_at(const F32x4x4 &M, I32 i)
+{
+	return *((const F32 *)&M + i);
+}
+
+inline static F32 &
+f32x4x4_at(F32x4x4 &M, I32 i)
+{
+	return *((F32 *)&M + i);
+}
+
+// ---- Identity --------------------------------------------------------------
+
+inline static F32x4x4
+f32x4x4_identity()
+{
+	return F32x4x4{
+		1.0f, 0.0f, 0.0f, 0.0f,
+		0.0f, 1.0f, 0.0f, 0.0f,
+		0.0f, 0.0f, 1.0f, 0.0f,
+		0.0f, 0.0f, 0.0f, 1.0f
+	};
+}
+
+// ---- Element-wise operators ------------------------------------------------
+
+inline static F32x4x4
+operator+(const F32x4x4 &A, const F32x4x4 &B)
+{
+	F32x4x4 R;
+	R.rows[0] = A.rows[0] + B.rows[0];
+	R.rows[1] = A.rows[1] + B.rows[1];
+	R.rows[2] = A.rows[2] + B.rows[2];
+	R.rows[3] = A.rows[3] + B.rows[3];
+	return R;
+}
+
+inline static F32x4x4 &
+operator+=(F32x4x4 &A, const F32x4x4 &B) { A = A + B; return A; }
+
+inline static F32x4x4
+operator-(const F32x4x4 &M)
+{
+	F32x4x4 R;
+	R.rows[0] = -M.rows[0];
+	R.rows[1] = -M.rows[1];
+	R.rows[2] = -M.rows[2];
+	R.rows[3] = -M.rows[3];
+	return R;
+}
+
+inline static F32x4x4
+operator-(const F32x4x4 &A, const F32x4x4 &B)
+{
+	F32x4x4 R;
+	R.rows[0] = A.rows[0] - B.rows[0];
+	R.rows[1] = A.rows[1] - B.rows[1];
+	R.rows[2] = A.rows[2] - B.rows[2];
+	R.rows[3] = A.rows[3] - B.rows[3];
+	return R;
+}
+
+inline static F32x4x4 &
+operator-=(F32x4x4 &A, const F32x4x4 &B) { A = A - B; return A; }
+
+inline static F32x4x4
+operator*(const F32x4x4 &M, F32 s)
+{
+	F32x4x4 R;
+	R.rows[0] = M.rows[0] * s;
+	R.rows[1] = M.rows[1] * s;
+	R.rows[2] = M.rows[2] * s;
+	R.rows[3] = M.rows[3] * s;
+	return R;
+}
+
+inline static F32x4x4
+operator*(F32 s, const F32x4x4 &M) { return M * s; }
+
+inline static F32x4x4 &
+operator*=(F32x4x4 &M, F32 s) { M = M * s; return M; }
+
+inline static F32x4x4
+operator/(const F32x4x4 &M, F32 s) { return M * (1.0f / s); }
+
+inline static F32x4x4 &
+operator/=(F32x4x4 &M, F32 s) { M = M / s; return M; }
+
+inline static bool
+operator==(const F32x4x4 &A, const F32x4x4 &B)
+{
+	return A.rows[0] == B.rows[0]
+	    && A.rows[1] == B.rows[1]
+	    && A.rows[2] == B.rows[2]
+	    && A.rows[3] == B.rows[3];
+}
+
+// ---- Vec-mat multiply (row-vector convention) ------------------------------
+// v * M = v.x*row0 + v.y*row1 + v.z*row2 + v.w*row3
+
+inline static F32x4
+operator*(const F32x4 &v, const F32x4x4 &M)
+{
+	return v.x * M.rows[0] + v.y * M.rows[1] + v.z * M.rows[2] + v.w * M.rows[3];
+}
+
+inline static F32x4 &
+operator*=(F32x4 &v, const F32x4x4 &M) { v = v * M; return v; }
+
+// ---- Mat-mat multiply ------------------------------------------------------
+// Row i of (A * B) = A.rows[i] * B
+
+inline static F32x4x4
+operator*(const F32x4x4 &A, const F32x4x4 &B)
+{
+	F32x4x4 R;
+	R.rows[0] = A.rows[0] * B;
+	R.rows[1] = A.rows[1] * B;
+	R.rows[2] = A.rows[2] * B;
+	R.rows[3] = A.rows[3] * B;
+	return R;
+}
+
+inline static F32x4x4 &
+operator*=(F32x4x4 &A, const F32x4x4 &B) { A = A * B; return A; }
+
+// ---- Transpose / determinant / inverse -------------------------------------
+
+inline static F32x4x4
+f32x4x4_transpose(const F32x4x4 &M)
+{
+	return F32x4x4{
+		M.m00, M.m10, M.m20, M.m30,
+		M.m01, M.m11, M.m21, M.m31,
+		M.m02, M.m12, M.m22, M.m32,
+		M.m03, M.m13, M.m23, M.m33
+	};
+}
+
+inline static F32
+f32x4x4_determinant(const F32x4x4 &M)
+{
+	return (M.m00 * M.m11 - M.m01 * M.m10) * (M.m22 * M.m33 - M.m23 * M.m32)
+	     - (M.m00 * M.m12 - M.m02 * M.m10) * (M.m21 * M.m33 - M.m23 * M.m31)
+	     + (M.m00 * M.m13 - M.m03 * M.m10) * (M.m21 * M.m32 - M.m22 * M.m31)
+	     + (M.m01 * M.m12 - M.m02 * M.m11) * (M.m20 * M.m33 - M.m23 * M.m30)
+	     - (M.m01 * M.m13 - M.m03 * M.m11) * (M.m20 * M.m32 - M.m22 * M.m30)
+	     + (M.m02 * M.m13 - M.m03 * M.m12) * (M.m20 * M.m31 - M.m21 * M.m30);
+}
+
+inline static bool
+f32x4x4_is_invertible(const F32x4x4 &M)
+{
+	return f32x4x4_determinant(M) != 0.0f;
+}
+
+inline static F32x4x4
+f32x4x4_inverse(const F32x4x4 &M)
+{
+	F32 d = f32x4x4_determinant(M);
+	if (d == 0.0f)
+		return F32x4x4{};
+
+	// Matrix of cofactors, transposed (adjoint), divided by determinant.
+	F32x4x4 adj = F32x4x4{
+		// m00
+		+ M.m11 * (M.m22 * M.m33 - M.m23 * M.m32)
+		- M.m12 * (M.m21 * M.m33 - M.m23 * M.m31)
+		+ M.m13 * (M.m21 * M.m32 - M.m22 * M.m31),
+		// m10
+		- M.m01 * (M.m22 * M.m33 - M.m23 * M.m32)
+		+ M.m02 * (M.m21 * M.m33 - M.m23 * M.m31)
+		- M.m03 * (M.m21 * M.m32 - M.m22 * M.m31),
+		// m20
+		+ M.m01 * (M.m12 * M.m33 - M.m13 * M.m32)
+		- M.m02 * (M.m11 * M.m33 - M.m13 * M.m31)
+		+ M.m03 * (M.m11 * M.m32 - M.m12 * M.m31),
+		// m30
+		- M.m01 * (M.m12 * M.m23 - M.m13 * M.m22)
+		+ M.m02 * (M.m11 * M.m23 - M.m13 * M.m21)
+		- M.m03 * (M.m11 * M.m22 - M.m12 * M.m21),
+
+		// m01
+		- M.m10 * (M.m22 * M.m33 - M.m23 * M.m32)
+		+ M.m12 * (M.m20 * M.m33 - M.m23 * M.m30)
+		- M.m13 * (M.m20 * M.m32 - M.m22 * M.m30),
+		// m11
+		+ M.m00 * (M.m22 * M.m33 - M.m23 * M.m32)
+		- M.m02 * (M.m20 * M.m33 - M.m23 * M.m30)
+		+ M.m03 * (M.m20 * M.m32 - M.m22 * M.m30),
+		// m21
+		- M.m00 * (M.m12 * M.m33 - M.m13 * M.m32)
+		+ M.m02 * (M.m10 * M.m33 - M.m13 * M.m30)
+		- M.m03 * (M.m10 * M.m32 - M.m12 * M.m30),
+		// m31
+		+ M.m00 * (M.m12 * M.m23 - M.m13 * M.m22)
+		- M.m02 * (M.m10 * M.m23 - M.m13 * M.m20)
+		+ M.m03 * (M.m10 * M.m22 - M.m12 * M.m20),
+
+		// m02
+		+ M.m10 * (M.m21 * M.m33 - M.m23 * M.m31)
+		- M.m11 * (M.m20 * M.m33 - M.m23 * M.m30)
+		+ M.m13 * (M.m20 * M.m31 - M.m21 * M.m30),
+		// m12
+		- M.m00 * (M.m21 * M.m33 - M.m23 * M.m31)
+		+ M.m01 * (M.m20 * M.m33 - M.m23 * M.m30)
+		- M.m03 * (M.m20 * M.m31 - M.m21 * M.m30),
+		// m22
+		+ M.m00 * (M.m11 * M.m33 - M.m13 * M.m31)
+		- M.m01 * (M.m10 * M.m33 - M.m13 * M.m30)
+		+ M.m03 * (M.m10 * M.m31 - M.m11 * M.m30),
+		// m32
+		- M.m00 * (M.m11 * M.m23 - M.m13 * M.m21)
+		+ M.m01 * (M.m10 * M.m23 - M.m13 * M.m20)
+		- M.m03 * (M.m10 * M.m21 - M.m11 * M.m20),
+
+		// m03
+		- M.m10 * (M.m21 * M.m32 - M.m22 * M.m31)
+		+ M.m11 * (M.m20 * M.m32 - M.m22 * M.m30)
+		- M.m12 * (M.m20 * M.m31 - M.m21 * M.m30),
+		// m13
+		+ M.m00 * (M.m21 * M.m32 - M.m22 * M.m31)
+		- M.m01 * (M.m20 * M.m32 - M.m22 * M.m30)
+		+ M.m02 * (M.m20 * M.m31 - M.m21 * M.m30),
+		// m23
+		- M.m00 * (M.m11 * M.m32 - M.m12 * M.m31)
+		+ M.m01 * (M.m10 * M.m32 - M.m12 * M.m30)
+		- M.m02 * (M.m10 * M.m31 - M.m11 * M.m30),
+		// m33
+		+ M.m00 * (M.m11 * M.m22 - M.m12 * M.m21)
+		- M.m01 * (M.m10 * M.m22 - M.m12 * M.m20)
+		+ M.m02 * (M.m10 * M.m21 - M.m11 * M.m20)
+	};
+
+	return adj * (1.0f / d);
+}
+
+// ---- Basis axes (extract rotation axes from a transform) -------------------
+
+inline static F32x3
+f32x4x4_axis_x(const F32x4x4 &M) { return f32x3_normalize(F32x3{M.m00, M.m01, M.m02}); }
+
+inline static F32x3
+f32x4x4_axis_y(const F32x4x4 &M) { return f32x3_normalize(F32x3{M.m10, M.m11, M.m12}); }
+
+inline static F32x3
+f32x4x4_axis_z(const F32x4x4 &M) { return f32x3_normalize(F32x3{M.m20, M.m21, M.m22}); }
+
+// ---- TRS builders ----------------------------------------------------------
+
+inline static F32x4x4
+f32x4x4_translation(F32 dx, F32 dy, F32 dz)
+{
+	return F32x4x4{
+		1.0f, 0.0f, 0.0f, 0.0f,
+		0.0f, 1.0f, 0.0f, 0.0f,
+		0.0f, 0.0f, 1.0f, 0.0f,
+		dx,   dy,   dz,   1.0f
+	};
+}
+
+inline static F32x4x4
+f32x4x4_translation(const F32x3 &t) { return f32x4x4_translation(t.x, t.y, t.z); }
+
+inline static F32x4x4
+f32x4x4_rotation_x(F32 angle_in_radians)
+{
+	F32 c = f32_cos(angle_in_radians);
+	F32 s = f32_sin(angle_in_radians);
+	return F32x4x4{
+		1.0f, 0.0f, 0.0f, 0.0f,
+		0.0f,    c,    s, 0.0f,
+		0.0f,   -s,    c, 0.0f,
+		0.0f, 0.0f, 0.0f, 1.0f
+	};
+}
+
+inline static F32x4x4
+f32x4x4_rotation_y(F32 angle_in_radians)
+{
+	F32 c = f32_cos(angle_in_radians);
+	F32 s = f32_sin(angle_in_radians);
+	return F32x4x4{
+		   c, 0.0f,   -s, 0.0f,
+		0.0f, 1.0f, 0.0f, 0.0f,
+		   s, 0.0f,    c, 0.0f,
+		0.0f, 0.0f, 0.0f, 1.0f
+	};
+}
+
+inline static F32x4x4
+f32x4x4_rotation_z(F32 angle_in_radians)
+{
+	F32 c = f32_cos(angle_in_radians);
+	F32 s = f32_sin(angle_in_radians);
+	return F32x4x4{
+		    c,    s, 0.0f, 0.0f,
+		   -s,    c, 0.0f, 0.0f,
+		 0.0f, 0.0f, 1.0f, 0.0f,
+		 0.0f, 0.0f, 0.0f, 1.0f
+	};
+}
+
+inline static F32x4x4
+f32x4x4_scaling(F32 sx, F32 sy, F32 sz)
+{
+	return F32x4x4{
+		sx,   0.0f, 0.0f, 0.0f,
+		0.0f, sy,   0.0f, 0.0f,
+		0.0f, 0.0f, sz,   0.0f,
+		0.0f, 0.0f, 0.0f, 1.0f
+	};
+}
+
+inline static F32x4x4
+f32x4x4_scaling(const F32x3 &s) { return f32x4x4_scaling(s.x, s.y, s.z); }
+
+// ---- Projection / view builders --------------------------------------------
+// Canonical convention: right-handed, Y-up world, Y-up NDC, Z in [0, 1].
+
+// Projection helpers emit Y-up clip space (Metal / D3D12 / OpenGL native).
+// For Vulkan (Y-down native NDC), the RHI backend handles the mismatch via a
+// negative-height viewport (`VK_KHR_maintenance1`) so shaders stay backend-
+// agnostic. See notes in engine/src/rhi/rhi_vulkan.cpp.
+
+inline static F32x4x4
+f32x4x4_orthographic(F32 left, F32 right, F32 bottom, F32 top, F32 znear, F32 zfar)
+{
+	F32x4x4 M = {};
+	M.m00 =  2.0f / (right - left);
+	M.m30 = -(right + left) / (right - left);
+
+	M.m11 =  2.0f / (top - bottom);
+	M.m31 = -(top + bottom) / (top - bottom);
+
+	M.m22 = -1.0f / (zfar - znear);
+	M.m32 = -znear / (zfar - znear);
+
+	M.m33 = 1.0f;
+	return M;
+}
+
+inline static F32x4x4
+f32x4x4_perspective(F32 fovy_radians, F32 aspect, F32 znear, F32 zfar)
+{
+	F32x4x4 M = {};
+	F32 h = f32_tan(fovy_radians * 0.5f);
+	F32 w = aspect * h;
+
+	M.m00 = 1.0f / w;
+	M.m11 = 1.0f / h;
+	M.m22 = -zfar / (zfar - znear);
+	M.m23 = -1.0f;
+	M.m32 = -(zfar * znear) / (zfar - znear);
+	return M;
+}
+
+inline static F32x4x4
+f32x4x4_look_at(const F32x3 &eye, const F32x3 &target, const F32x3 &up)
+{
+	F32x3 axis_z = f32x3_normalize(eye - target);
+	F32x3 axis_x = f32x3_normalize(f32x3_cross(up, axis_z));
+	F32x3 axis_y = f32x3_cross(axis_z, axis_x);
+
+	F32x3 t = {
+		-f32x3_dot(eye, axis_x),
+		-f32x3_dot(eye, axis_y),
+		-f32x3_dot(eye, axis_z)
+	};
+
+	return F32x4x4{
+		axis_x.x, axis_y.x, axis_z.x, 0.0f,
+		axis_x.y, axis_y.y, axis_z.y, 0.0f,
+		axis_x.z, axis_y.z, axis_z.z, 0.0f,
+		t.x,      t.y,      t.z,      1.0f
+	};
+}
+
+// ---- Screen / view-space helpers -------------------------------------------
+// `viewport` is packed as F32x4{x, y, width, height}. NDC convention is Y-up,
+// Z in [0,1] — screen space returned by project is pixel-space with Y matching
+// the viewport's orientation.
+
+inline static F32x3
+f32x3_project(const F32x3 &world, const F32x4x4 &view_projection, const F32x4 &viewport)
+{
+	F32x4 clip = F32x4{world.x, world.y, world.z, 1.0f} * view_projection;
+	if (clip.w == 0.0f)
+		return F32x3{0.0f, 0.0f, 0.0f};
+
+	F32x3 ndc = {clip.x / clip.w, clip.y / clip.w, clip.z / clip.w};
+
+	return F32x3{
+		viewport.x + (ndc.x * 0.5f + 0.5f) * viewport.z,
+		viewport.y + (ndc.y * 0.5f + 0.5f) * viewport.w,
+		ndc.z
+	};
+}
+
+inline static F32x3
+f32x3_unproject(const F32x3 &screen, const F32x4x4 &view_projection_inverse, const F32x4 &viewport)
+{
+	F32x4 ndc = F32x4{
+		((screen.x - viewport.x) / viewport.z) * 2.0f - 1.0f,
+		((screen.y - viewport.y) / viewport.w) * 2.0f - 1.0f,
+		screen.z,
+		1.0f
+	};
+	F32x4 world = ndc * view_projection_inverse;
+	if (world.w == 0.0f)
+		return F32x3{0.0f, 0.0f, 0.0f};
+	return F32x3{world.x / world.w, world.y / world.w, world.z / world.w};
+}
+
+// ---- Approx equality -------------------------------------------------------
+
+inline static bool
+f32x4x4_approx_equal(const F32x4x4 &A, const F32x4x4 &B, F32 epsilon)
+{
+	return f32x4_approx_equal(A.rows[0], B.rows[0], epsilon)
+	    && f32x4_approx_equal(A.rows[1], B.rows[1], epsilon)
+	    && f32x4_approx_equal(A.rows[2], B.rows[2], epsilon)
+	    && f32x4_approx_equal(A.rows[3], B.rows[3], epsilon);
+}
diff --git a/core/math/f64.h b/core/math/f64.h
new file mode 100644
index 00000000..37086d42
--- /dev/null
+++ b/core/math/f64.h
@@ -0,0 +1,259 @@
+#pragma once
+
+#include <core/defines.h>
+
+#include <cmath>
+#include <limits>
+
+// ============================================================================
+// F64 scalar helpers + constants — full prefixed mirror of F32.h.
+// ============================================================================
+
+// ---- Angular constants -----------------------------------------------------
+
+static constexpr F64 F64_PI         = 3.14159265358979323846;
+static constexpr F64 F64_TAU        = F64_PI * 2.0;
+static constexpr F64 F64_PI_OVER_2  = F64_PI * 0.5;
+static constexpr F64 F64_TO_DEGREES = 360.0 / F64_TAU;
+static constexpr F64 F64_TO_RADIANS = F64_TAU / 360.0;
+
+// ---- Numeric limits / special values ---------------------------------------
+// F64_MIN / F64_MAX live in core/defines.h.
+
+static constexpr F64 F64_EPSILON      = std::numeric_limits<F64>::epsilon();
+static constexpr F64 F64_INFINITY     = std::numeric_limits<F64>::infinity();
+static constexpr F64 F64_NEG_INFINITY = -F64_INFINITY;
+static constexpr F64 F64_NAN          = std::numeric_limits<F64>::quiet_NaN();
+
+// ---- Transcendental / trigonometric wrappers -------------------------------
+
+inline static F64
+f64_sqrt(F64 x)
+{
+	return ::sqrt(x);
+}
+
+inline static F64
+f64_sin(F64 x)
+{
+	return ::sin(x);
+}
+
+inline static F64
+f64_asin(F64 x)
+{
+	return ::asin(x);
+}
+
+inline static F64
+f64_cos(F64 x)
+{
+	return ::cos(x);
+}
+
+inline static F64
+f64_acos(F64 x)
+{
+	return ::acos(x);
+}
+
+inline static F64
+f64_tan(F64 x)
+{
+	return ::tan(x);
+}
+
+inline static F64
+f64_atan2(F64 y, F64 x)
+{
+	return ::atan2(y, x);
+}
+
+inline static F64
+f64_power(F64 base, F64 exponent)
+{
+	return ::pow(base, exponent);
+}
+
+inline static F64
+f64_modulo(F64 x, F64 divisor)
+{
+	return ::fmod(x, divisor);
+}
+
+// ---- Basic arithmetic helpers ----------------------------------------------
+
+inline static F64
+f64_abs(F64 x)
+{
+	return x < 0.0 ? -x : x;
+}
+
+inline static F64
+f64_sign(F64 x)
+{
+	if (x > 0.0) return  1.0;
+	if (x < 0.0) return -1.0;
+	return 0.0;
+}
+
+inline static F64
+f64_min(F64 a, F64 b)
+{
+	return a < b ? a : b;
+}
+
+inline static F64
+f64_max(F64 a, F64 b)
+{
+	return a > b ? a : b;
+}
+
+inline static F64
+f64_clamp(F64 x, F64 lo, F64 hi)
+{
+	if (x < lo) return lo;
+	if (x > hi) return hi;
+	return x;
+}
+
+inline static F64
+f64_lerp(F64 a, F64 b, F64 t)
+{
+	return a + t * (b - a);
+}
+
+// ---- Special-value tests ---------------------------------------------------
+
+inline static bool
+f64_is_nan(F64 x)
+{
+	return x != x;
+}
+
+inline static bool
+f64_is_infinite(F64 x)
+{
+	return x == F64_INFINITY || x == F64_NEG_INFINITY;
+}
+
+inline static bool
+f64_is_finite(F64 x)
+{
+	return !f64_is_nan(x) && !f64_is_infinite(x);
+}
+
+inline static bool
+f64_approx_equal(F64 a, F64 b, F64 epsilon)
+{
+	return f64_abs(a - b) <= epsilon;
+}
+
+// ---- Interpolation beyond lerp ---------------------------------------------
+
+inline static F64
+f64_smoothstep(F64 edge0, F64 edge1, F64 x)
+{
+	F64 t = f64_clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
+	return t * t * (3.0 - 2.0 * t);
+}
+
+inline static F64
+f64_smootherstep(F64 edge0, F64 edge1, F64 x)
+{
+	F64 t = f64_clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
+	return t * t * t * (t * (t * 6.0 - 15.0) + 10.0);
+}
+
+inline static F64
+f64_smooth_damp(F64 current, F64 target, F64 *velocity, F64 smooth_time, F64 dt)
+{
+	smooth_time = f64_max(smooth_time, 0.0001);
+	F64 omega   = 2.0 / smooth_time;
+	F64 x       = omega * dt;
+	F64 exp_    = 1.0 / (1.0 + x + 0.48 * x * x + 0.235 * x * x * x);
+	F64 delta   = current - target;
+	F64 temp    = (*velocity + omega * delta) * dt;
+	*velocity   = (*velocity - omega * temp) * exp_;
+	return target + (delta + temp) * exp_;
+}
+
+// ---- Easing curves ---------------------------------------------------------
+
+inline static F64
+f64_ease_in_quad(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	return t * t;
+}
+
+inline static F64
+f64_ease_out_quad(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	return 1.0 - (1.0 - t) * (1.0 - t);
+}
+
+inline static F64
+f64_ease_in_out_quad(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	return t < 0.5 ? 2.0 * t * t : 1.0 - 2.0 * (1.0 - t) * (1.0 - t);
+}
+
+inline static F64
+f64_ease_in_cubic(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	return t * t * t;
+}
+
+inline static F64
+f64_ease_out_cubic(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	F64 one_minus = 1.0 - t;
+	return 1.0 - one_minus * one_minus * one_minus;
+}
+
+inline static F64
+f64_ease_in_out_cubic(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	if (t < 0.5)
+		return 4.0 * t * t * t;
+	F64 one_minus = 1.0 - t;
+	return 1.0 - 4.0 * one_minus * one_minus * one_minus;
+}
+
+inline static F64
+f64_ease_in_elastic(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	if (t == 0.0) return 0.0;
+	if (t == 1.0) return 1.0;
+	constexpr F64 c4 = F64_TAU / 3.0;
+	return -f64_power(2.0, 10.0 * t - 10.0) * f64_sin((t * 10.0 - 10.75) * c4);
+}
+
+inline static F64
+f64_ease_out_elastic(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	if (t == 0.0) return 0.0;
+	if (t == 1.0) return 1.0;
+	constexpr F64 c4 = F64_TAU / 3.0;
+	return f64_power(2.0, -10.0 * t) * f64_sin((t * 10.0 - 0.75) * c4) + 1.0;
+}
+
+inline static F64
+f64_ease_in_out_elastic(F64 t)
+{
+	t = f64_clamp(t, 0.0, 1.0);
+	if (t == 0.0) return 0.0;
+	if (t == 1.0) return 1.0;
+	constexpr F64 c5 = F64_TAU / 4.5;
+	if (t < 0.5)
+		return -(f64_power(2.0,  20.0 * t - 10.0) * f64_sin((20.0 * t - 11.125) * c5)) * 0.5;
+	return   (f64_power(2.0, -20.0 * t + 10.0) * f64_sin((20.0 * t - 11.125) * c5)) * 0.5 + 1.0;
+}
diff --git a/core/math/f64x2.h b/core/math/f64x2.h
new file mode 100644
index 00000000..4cef7bb0
--- /dev/null
+++ b/core/math/f64x2.h
@@ -0,0 +1,197 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f64.h>
+
+// ============================================================================
+// F64x2 — 2D F64 vector, SIMD-backed (fits naturally in 128-bit register).
+// ============================================================================
+
+#if defined(SIMD_FORCE_SCALAR)
+	struct Simd_F64x2 { F64 v[2]; };
+#elif defined(SIMD_NEON)
+	#include <arm_neon.h>
+	typedef float64x2_t Simd_F64x2;
+#elif defined(SIMD_AVX)
+	#include <immintrin.h>
+	typedef __m128d Simd_F64x2;
+#else
+	struct Simd_F64x2 { F64 v[2]; };
+#endif
+
+struct alignas(16) F64x2
+{
+	union
+	{
+		struct { F64 x, y; };
+		Simd_F64x2 simd;
+	};
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F64x2
+operator+(const F64x2 &a, const F64x2 &b)
+{
+	F64x2 r;
+#if defined(SIMD_NEON)
+	r.simd = vaddq_f64(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_add_pd(a.simd, b.simd);
+#else
+	r.x = a.x + b.x; r.y = a.y + b.y;
+#endif
+	return r;
+}
+
+inline static F64x2 &
+operator+=(F64x2 &a, const F64x2 &b) { a = a + b; return a; }
+
+inline static F64x2
+operator-(const F64x2 &a)
+{
+	F64x2 r;
+#if defined(SIMD_NEON)
+	r.simd = vnegq_f64(a.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_sub_pd(_mm_setzero_pd(), a.simd);
+#else
+	r.x = -a.x; r.y = -a.y;
+#endif
+	return r;
+}
+
+inline static F64x2
+operator-(const F64x2 &a, const F64x2 &b)
+{
+	F64x2 r;
+#if defined(SIMD_NEON)
+	r.simd = vsubq_f64(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_sub_pd(a.simd, b.simd);
+#else
+	r.x = a.x - b.x; r.y = a.y - b.y;
+#endif
+	return r;
+}
+
+inline static F64x2 &
+operator-=(F64x2 &a, const F64x2 &b) { a = a - b; return a; }
+
+inline static F64x2
+operator*(const F64x2 &a, F64 s)
+{
+	F64x2 r;
+#if defined(SIMD_NEON)
+	r.simd = vmulq_n_f64(a.simd, s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_mul_pd(a.simd, _mm_set1_pd(s));
+#else
+	r.x = a.x * s; r.y = a.y * s;
+#endif
+	return r;
+}
+
+inline static F64x2
+operator*(F64 s, const F64x2 &a) { return a * s; }
+
+inline static F64x2 &
+operator*=(F64x2 &a, F64 s) { a = a * s; return a; }
+
+inline static F64x2
+operator/(const F64x2 &a, F64 s) { return a * (1.0 / s); }
+
+inline static F64x2 &
+operator/=(F64x2 &a, F64 s) { a = a / s; return a; }
+
+inline static bool
+operator==(const F64x2 &a, const F64x2 &b) { return a.x == b.x && a.y == b.y; }
+
+// ---- Free functions --------------------------------------------------------
+
+inline static F64x2
+f64x2_from_f64(F64 s)
+{
+	F64x2 r;
+#if defined(SIMD_NEON)
+	r.simd = vdupq_n_f64(s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_set1_pd(s);
+#else
+	r.x = s; r.y = s;
+#endif
+	return r;
+}
+
+inline static F64
+f64x2_dot(const F64x2 &a, const F64x2 &b)
+{
+#if defined(SIMD_NEON)
+	return vaddvq_f64(vmulq_f64(a.simd, b.simd));
+#elif defined(SIMD_AVX)
+	return _mm_cvtsd_f64(_mm_dp_pd(a.simd, b.simd, 0x31));
+#else
+	return a.x * b.x + a.y * b.y;
+#endif
+}
+
+inline static F64
+f64x2_cross(const F64x2 &a, const F64x2 &b)
+{
+	return a.x * b.y - a.y * b.x;
+}
+
+inline static F64
+f64x2_length_squared(const F64x2 &a) { return f64x2_dot(a, a); }
+
+inline static F64
+f64x2_length(const F64x2 &a) { return f64_sqrt(f64x2_length_squared(a)); }
+
+inline static F64x2
+f64x2_normalize(const F64x2 &a) { return a / f64x2_length(a); }
+
+inline static F64x2
+f64x2_min(const F64x2 &a, const F64x2 &b)
+{
+	F64x2 r;
+#if defined(SIMD_NEON)
+	r.simd = vminq_f64(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_min_pd(a.simd, b.simd);
+#else
+	r.x = f64_min(a.x, b.x); r.y = f64_min(a.y, b.y);
+#endif
+	return r;
+}
+
+inline static F64x2
+f64x2_max(const F64x2 &a, const F64x2 &b)
+{
+	F64x2 r;
+#if defined(SIMD_NEON)
+	r.simd = vmaxq_f64(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_max_pd(a.simd, b.simd);
+#else
+	r.x = f64_max(a.x, b.x); r.y = f64_max(a.y, b.y);
+#endif
+	return r;
+}
+
+inline static F64x2
+f64x2_lerp(const F64x2 &a, const F64x2 &b, F64 t) { return a + (b - a) * t; }
+
+inline static F64x2
+f64x2_clamp(const F64x2 &v, const F64x2 &lo, const F64x2 &hi) { return f64x2_min(f64x2_max(v, lo), hi); }
+
+inline static bool
+f64x2_approx_equal(const F64x2 &a, const F64x2 &b, F64 epsilon)
+{
+	return f64_approx_equal(a.x, b.x, epsilon)
+	    && f64_approx_equal(a.y, b.y, epsilon);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr F64x2 F64X2_ZERO = {0.0, 0.0};
+static constexpr F64x2 F64X2_ONE  = {1.0, 1.0};
diff --git a/core/math/f64x2x2.h b/core/math/f64x2x2.h
new file mode 100644
index 00000000..3e866f7a
--- /dev/null
+++ b/core/math/f64x2x2.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f64.h>
+#include <core/math/f64x2.h>
+
+// ============================================================================
+// F64x2x2 — 2x2 F64 matrix, row-major, scalar (2x2 is not worth SIMD).
+// Memory layout: [m00 m01 | m10 m11], 32 bytes.
+// ============================================================================
+
+struct F64x2x2
+{
+	F64 m00, m01;
+	F64 m10, m11;
+};
+
+inline static F64x2x2
+f64x2x2_identity()
+{
+	return F64x2x2{1.0, 0.0, 0.0, 1.0};
+}
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F64x2x2
+operator+(const F64x2x2 &A, const F64x2x2 &B)
+{
+	return F64x2x2{A.m00 + B.m00, A.m01 + B.m01, A.m10 + B.m10, A.m11 + B.m11};
+}
+
+inline static F64x2x2
+operator-(const F64x2x2 &M)
+{
+	return F64x2x2{-M.m00, -M.m01, -M.m10, -M.m11};
+}
+
+inline static F64x2x2
+operator-(const F64x2x2 &A, const F64x2x2 &B)
+{
+	return F64x2x2{A.m00 - B.m00, A.m01 - B.m01, A.m10 - B.m10, A.m11 - B.m11};
+}
+
+inline static F64x2x2
+operator*(const F64x2x2 &M, F64 s)
+{
+	return F64x2x2{M.m00 * s, M.m01 * s, M.m10 * s, M.m11 * s};
+}
+
+inline static F64x2x2
+operator*(F64 s, const F64x2x2 &M) { return M * s; }
+
+inline static F64x2x2
+operator/(const F64x2x2 &M, F64 s) { return M * (1.0 / s); }
+
+inline static bool
+operator==(const F64x2x2 &A, const F64x2x2 &B)
+{
+	return A.m00 == B.m00 && A.m01 == B.m01 && A.m10 == B.m10 && A.m11 == B.m11;
+}
+
+inline static F64x2
+operator*(const F64x2 &v, const F64x2x2 &M)
+{
+	return F64x2{v.x * M.m00 + v.y * M.m10, v.x * M.m01 + v.y * M.m11};
+}
+
+inline static F64x2x2
+operator*(const F64x2x2 &A, const F64x2x2 &B)
+{
+	return F64x2x2{
+		A.m00 * B.m00 + A.m01 * B.m10,  A.m00 * B.m01 + A.m01 * B.m11,
+		A.m10 * B.m00 + A.m11 * B.m10,  A.m10 * B.m01 + A.m11 * B.m11
+	};
+}
+
+// ---- Transpose / determinant / inverse -------------------------------------
+
+inline static F64x2x2
+f64x2x2_transpose(const F64x2x2 &M)
+{
+	return F64x2x2{M.m00, M.m10, M.m01, M.m11};
+}
+
+inline static F64
+f64x2x2_determinant(const F64x2x2 &M)
+{
+	return M.m00 * M.m11 - M.m01 * M.m10;
+}
+
+inline static bool
+f64x2x2_is_invertible(const F64x2x2 &M)
+{
+	return f64x2x2_determinant(M) != 0.0;
+}
+
+inline static F64x2x2
+f64x2x2_inverse(const F64x2x2 &M)
+{
+	F64 d = f64x2x2_determinant(M);
+	if (d == 0.0)
+		return F64x2x2{};
+	F64 inv_d = 1.0 / d;
+	return F64x2x2{ M.m11 * inv_d, -M.m01 * inv_d, -M.m10 * inv_d, M.m00 * inv_d };
+}
diff --git a/core/math/f64x3.h b/core/math/f64x3.h
new file mode 100644
index 00000000..8ee58905
--- /dev/null
+++ b/core/math/f64x3.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f64.h>
+
+// ============================================================================
+// F64x3 — 3D F64 vector. Packed 24 bytes (no alignas) for GPU-attribute interop.
+// ============================================================================
+
+struct F64x3
+{
+	F64 x, y, z;
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F64x3
+operator+(const F64x3 &a, const F64x3 &b) { return F64x3{a.x + b.x, a.y + b.y, a.z + b.z}; }
+
+inline static F64x3 &
+operator+=(F64x3 &a, const F64x3 &b) { a = a + b; return a; }
+
+inline static F64x3
+operator-(const F64x3 &a) { return F64x3{-a.x, -a.y, -a.z}; }
+
+inline static F64x3
+operator-(const F64x3 &a, const F64x3 &b) { return F64x3{a.x - b.x, a.y - b.y, a.z - b.z}; }
+
+inline static F64x3 &
+operator-=(F64x3 &a, const F64x3 &b) { a = a - b; return a; }
+
+inline static F64x3
+operator*(const F64x3 &a, F64 s) { return F64x3{a.x * s, a.y * s, a.z * s}; }
+
+inline static F64x3
+operator*(F64 s, const F64x3 &a) { return a * s; }
+
+inline static F64x3 &
+operator*=(F64x3 &a, F64 s) { a = a * s; return a; }
+
+inline static F64x3
+operator/(const F64x3 &a, F64 s) { return a * (1.0 / s); }
+
+inline static F64x3 &
+operator/=(F64x3 &a, F64 s) { a = a / s; return a; }
+
+inline static bool
+operator==(const F64x3 &a, const F64x3 &b) { return a.x == b.x && a.y == b.y && a.z == b.z; }
+
+// ---- Free functions --------------------------------------------------------
+
+inline static F64x3
+f64x3_from_f64(F64 s) { return F64x3{s, s, s}; }
+
+inline static F64
+f64x3_dot(const F64x3 &a, const F64x3 &b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
+
+inline static F64x3
+f64x3_cross(const F64x3 &a, const F64x3 &b)
+{
+	return F64x3{
+		a.y * b.z - a.z * b.y,
+		a.z * b.x - a.x * b.z,
+		a.x * b.y - a.y * b.x
+	};
+}
+
+inline static F64
+f64x3_length_squared(const F64x3 &a) { return f64x3_dot(a, a); }
+
+inline static F64
+f64x3_length(const F64x3 &a) { return f64_sqrt(f64x3_length_squared(a)); }
+
+inline static F64x3
+f64x3_normalize(const F64x3 &a) { return a / f64x3_length(a); }
+
+inline static F64x3
+f64x3_min(const F64x3 &a, const F64x3 &b) { return F64x3{f64_min(a.x, b.x), f64_min(a.y, b.y), f64_min(a.z, b.z)}; }
+
+inline static F64x3
+f64x3_max(const F64x3 &a, const F64x3 &b) { return F64x3{f64_max(a.x, b.x), f64_max(a.y, b.y), f64_max(a.z, b.z)}; }
+
+inline static F64x3
+f64x3_lerp(const F64x3 &a, const F64x3 &b, F64 t) { return F64x3{f64_lerp(a.x, b.x, t), f64_lerp(a.y, b.y, t), f64_lerp(a.z, b.z, t)}; }
+
+inline static F64x3
+f64x3_clamp(const F64x3 &v, const F64x3 &lo, const F64x3 &hi) { return f64x3_min(f64x3_max(v, lo), hi); }
+
+inline static bool
+f64x3_approx_equal(const F64x3 &a, const F64x3 &b, F64 epsilon)
+{
+	return f64_approx_equal(a.x, b.x, epsilon)
+	    && f64_approx_equal(a.y, b.y, epsilon)
+	    && f64_approx_equal(a.z, b.z, epsilon);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr F64x3 F64X3_ZERO     = { 0.0,  0.0,  0.0};
+static constexpr F64x3 F64X3_ONE      = { 1.0,  1.0,  1.0};
+static constexpr F64x3 F64X3_RIGHT    = { 1.0,  0.0,  0.0};
+static constexpr F64x3 F64X3_LEFT     = {-1.0,  0.0,  0.0};
+static constexpr F64x3 F64X3_UP       = { 0.0,  1.0,  0.0};
+static constexpr F64x3 F64X3_DOWN     = { 0.0, -1.0,  0.0};
+static constexpr F64x3 F64X3_FORWARD  = { 0.0,  0.0, -1.0};
+static constexpr F64x3 F64X3_BACKWARD = { 0.0,  0.0,  1.0};
diff --git a/core/math/f64x3x3.h b/core/math/f64x3x3.h
new file mode 100644
index 00000000..11666759
--- /dev/null
+++ b/core/math/f64x3x3.h
@@ -0,0 +1,151 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f64.h>
+#include <core/math/f64x3.h>
+#include <core/math/f64x4.h>  // For padded F64x4 row storage.
+
+// ============================================================================
+// F64x3x3 — 3x3 F64 matrix, row-major, SIMD-backed.
+// Storage: 3 × F64x4 padded rows (96 bytes total, matches std140 / MSL layout).
+// ============================================================================
+
+struct alignas(32) F64x3x3
+{
+	union
+	{
+		struct
+		{
+			F64 m00, m01, m02, _pad0;
+			F64 m10, m11, m12, _pad1;
+			F64 m20, m21, m22, _pad2;
+		};
+		F64x4 rows[3];
+	};
+};
+
+inline static F64x3x3
+f64x3x3_identity()
+{
+	return F64x3x3{
+		1.0, 0.0, 0.0, 0.0,
+		0.0, 1.0, 0.0, 0.0,
+		0.0, 0.0, 1.0, 0.0
+	};
+}
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F64x3x3 operator+(const F64x3x3 &A, const F64x3x3 &B)
+{
+	F64x3x3 R;
+	R.rows[0] = A.rows[0] + B.rows[0];
+	R.rows[1] = A.rows[1] + B.rows[1];
+	R.rows[2] = A.rows[2] + B.rows[2];
+	return R;
+}
+
+inline static F64x3x3 operator-(const F64x3x3 &M)
+{
+	F64x3x3 R;
+	R.rows[0] = -M.rows[0];
+	R.rows[1] = -M.rows[1];
+	R.rows[2] = -M.rows[2];
+	return R;
+}
+
+inline static F64x3x3 operator-(const F64x3x3 &A, const F64x3x3 &B)
+{
+	F64x3x3 R;
+	R.rows[0] = A.rows[0] - B.rows[0];
+	R.rows[1] = A.rows[1] - B.rows[1];
+	R.rows[2] = A.rows[2] - B.rows[2];
+	return R;
+}
+
+inline static F64x3x3 operator*(const F64x3x3 &M, F64 s)
+{
+	F64x3x3 R;
+	R.rows[0] = M.rows[0] * s;
+	R.rows[1] = M.rows[1] * s;
+	R.rows[2] = M.rows[2] * s;
+	return R;
+}
+
+inline static F64x3x3 operator*(F64 s, const F64x3x3 &M) { return M * s; }
+inline static F64x3x3 operator/(const F64x3x3 &M, F64 s) { return M * (1.0 / s); }
+
+inline static bool operator==(const F64x3x3 &A, const F64x3x3 &B)
+{
+	return A.m00 == B.m00 && A.m01 == B.m01 && A.m02 == B.m02
+	    && A.m10 == B.m10 && A.m11 == B.m11 && A.m12 == B.m12
+	    && A.m20 == B.m20 && A.m21 == B.m21 && A.m22 == B.m22;
+}
+
+inline static F64x3
+operator*(const F64x3 &v, const F64x3x3 &M)
+{
+	return F64x3{
+		v.x * M.m00 + v.y * M.m10 + v.z * M.m20,
+		v.x * M.m01 + v.y * M.m11 + v.z * M.m21,
+		v.x * M.m02 + v.y * M.m12 + v.z * M.m22
+	};
+}
+
+inline static F64x3x3
+operator*(const F64x3x3 &A, const F64x3x3 &B)
+{
+	F64x3x3 R;
+
+	R.m00 = A.m00 * B.m00 + A.m01 * B.m10 + A.m02 * B.m20;
+	R.m01 = A.m00 * B.m01 + A.m01 * B.m11 + A.m02 * B.m21;
+	R.m02 = A.m00 * B.m02 + A.m01 * B.m12 + A.m02 * B.m22;
+	R._pad0 = 0.0;
+
+	R.m10 = A.m10 * B.m00 + A.m11 * B.m10 + A.m12 * B.m20;
+	R.m11 = A.m10 * B.m01 + A.m11 * B.m11 + A.m12 * B.m21;
+	R.m12 = A.m10 * B.m02 + A.m11 * B.m12 + A.m12 * B.m22;
+	R._pad1 = 0.0;
+
+	R.m20 = A.m20 * B.m00 + A.m21 * B.m10 + A.m22 * B.m20;
+	R.m21 = A.m20 * B.m01 + A.m21 * B.m11 + A.m22 * B.m21;
+	R.m22 = A.m20 * B.m02 + A.m21 * B.m12 + A.m22 * B.m22;
+	R._pad2 = 0.0;
+
+	return R;
+}
+
+inline static F64x3x3
+f64x3x3_transpose(const F64x3x3 &M)
+{
+	return F64x3x3{
+		M.m00, M.m10, M.m20, 0.0,
+		M.m01, M.m11, M.m21, 0.0,
+		M.m02, M.m12, M.m22, 0.0
+	};
+}
+
+inline static F64
+f64x3x3_determinant(const F64x3x3 &M)
+{
+	return M.m00 * (M.m11 * M.m22 - M.m12 * M.m21)
+	     - M.m01 * (M.m10 * M.m22 - M.m12 * M.m20)
+	     + M.m02 * (M.m10 * M.m21 - M.m11 * M.m20);
+}
+
+inline static bool
+f64x3x3_is_invertible(const F64x3x3 &M) { return f64x3x3_determinant(M) != 0.0; }
+
+inline static F64x3x3
+f64x3x3_inverse(const F64x3x3 &M)
+{
+	F64 d = f64x3x3_determinant(M);
+	if (d == 0.0)
+		return F64x3x3{};
+	F64 inv_d = 1.0 / d;
+	return F64x3x3{
+		 (M.m11 * M.m22 - M.m12 * M.m21) * inv_d, -(M.m01 * M.m22 - M.m02 * M.m21) * inv_d,  (M.m01 * M.m12 - M.m02 * M.m11) * inv_d, 0.0,
+		-(M.m10 * M.m22 - M.m12 * M.m20) * inv_d,  (M.m00 * M.m22 - M.m02 * M.m20) * inv_d, -(M.m00 * M.m12 - M.m02 * M.m10) * inv_d, 0.0,
+		 (M.m10 * M.m21 - M.m11 * M.m20) * inv_d, -(M.m00 * M.m21 - M.m01 * M.m20) * inv_d,  (M.m00 * M.m11 - M.m01 * M.m10) * inv_d, 0.0
+	};
+}
diff --git a/core/math/f64x4.h b/core/math/f64x4.h
new file mode 100644
index 00000000..b28fff87
--- /dev/null
+++ b/core/math/f64x4.h
@@ -0,0 +1,216 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f64.h>
+#include <core/math/f64x2.h>  // For Simd_F64x2 when composing Simd_F64x4 on NEON.
+
+// ============================================================================
+// F64x4 — 4D F64 vector, 32 bytes, SIMD-backed.
+//
+//  - x86_64 AVX: single __m256d register.
+//  - ARM64 NEON: two float64x2_t packed lane-wise (NEON has no native 4-wide
+//    double register).
+// ============================================================================
+
+#if defined(SIMD_FORCE_SCALAR)
+	struct Simd_F64x4 { F64 v[4]; };
+#elif defined(SIMD_NEON)
+	#include <arm_neon.h>
+	struct Simd_F64x4
+	{
+		float64x2_t lo;  // lanes x, y
+		float64x2_t hi;  // lanes z, w
+	};
+#elif defined(SIMD_AVX)
+	#include <immintrin.h>
+	typedef __m256d Simd_F64x4;
+#else
+	struct Simd_F64x4 { F64 v[4]; };
+#endif
+
+struct alignas(32) F64x4
+{
+	union
+	{
+		struct { F64 x, y, z, w; };
+		Simd_F64x4 simd;
+	};
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static F64x4
+operator+(const F64x4 &a, const F64x4 &b)
+{
+	F64x4 r;
+#if defined(SIMD_NEON)
+	r.simd.lo = vaddq_f64(a.simd.lo, b.simd.lo);
+	r.simd.hi = vaddq_f64(a.simd.hi, b.simd.hi);
+#elif defined(SIMD_AVX)
+	r.simd = _mm256_add_pd(a.simd, b.simd);
+#else
+	r.x = a.x + b.x; r.y = a.y + b.y; r.z = a.z + b.z; r.w = a.w + b.w;
+#endif
+	return r;
+}
+
+inline static F64x4 &
+operator+=(F64x4 &a, const F64x4 &b) { a = a + b; return a; }
+
+inline static F64x4
+operator-(const F64x4 &a)
+{
+	F64x4 r;
+#if defined(SIMD_NEON)
+	r.simd.lo = vnegq_f64(a.simd.lo);
+	r.simd.hi = vnegq_f64(a.simd.hi);
+#elif defined(SIMD_AVX)
+	r.simd = _mm256_sub_pd(_mm256_setzero_pd(), a.simd);
+#else
+	r.x = -a.x; r.y = -a.y; r.z = -a.z; r.w = -a.w;
+#endif
+	return r;
+}
+
+inline static F64x4
+operator-(const F64x4 &a, const F64x4 &b)
+{
+	F64x4 r;
+#if defined(SIMD_NEON)
+	r.simd.lo = vsubq_f64(a.simd.lo, b.simd.lo);
+	r.simd.hi = vsubq_f64(a.simd.hi, b.simd.hi);
+#elif defined(SIMD_AVX)
+	r.simd = _mm256_sub_pd(a.simd, b.simd);
+#else
+	r.x = a.x - b.x; r.y = a.y - b.y; r.z = a.z - b.z; r.w = a.w - b.w;
+#endif
+	return r;
+}
+
+inline static F64x4 &
+operator-=(F64x4 &a, const F64x4 &b) { a = a - b; return a; }
+
+inline static F64x4
+operator*(const F64x4 &a, F64 s)
+{
+	F64x4 r;
+#if defined(SIMD_NEON)
+	r.simd.lo = vmulq_n_f64(a.simd.lo, s);
+	r.simd.hi = vmulq_n_f64(a.simd.hi, s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm256_mul_pd(a.simd, _mm256_set1_pd(s));
+#else
+	r.x = a.x * s; r.y = a.y * s; r.z = a.z * s; r.w = a.w * s;
+#endif
+	return r;
+}
+
+inline static F64x4
+operator*(F64 s, const F64x4 &a) { return a * s; }
+
+inline static F64x4 &
+operator*=(F64x4 &a, F64 s) { a = a * s; return a; }
+
+inline static F64x4
+operator/(const F64x4 &a, F64 s) { return a * (1.0 / s); }
+
+inline static F64x4 &
+operator/=(F64x4 &a, F64 s) { a = a / s; return a; }
+
+inline static bool
+operator==(const F64x4 &a, const F64x4 &b) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; }
+
+// ---- Free functions --------------------------------------------------------
+
+inline static F64x4
+f64x4_from_f64(F64 s)
+{
+	F64x4 r;
+#if defined(SIMD_NEON)
+	r.simd.lo = vdupq_n_f64(s);
+	r.simd.hi = vdupq_n_f64(s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm256_set1_pd(s);
+#else
+	r.x = s; r.y = s; r.z = s; r.w = s;
+#endif
+	return r;
+}
+
+inline static F64
+f64x4_dot(const F64x4 &a, const F64x4 &b)
+{
+#if defined(SIMD_NEON)
+	return vaddvq_f64(vmulq_f64(a.simd.lo, b.simd.lo))
+	     + vaddvq_f64(vmulq_f64(a.simd.hi, b.simd.hi));
+#elif defined(SIMD_AVX)
+	__m256d m   = _mm256_mul_pd(a.simd, b.simd);
+	__m128d low = _mm256_castpd256_pd128(m);
+	__m128d hi  = _mm256_extractf128_pd(m, 1);
+	__m128d sum = _mm_add_pd(low, hi);
+	return _mm_cvtsd_f64(_mm_add_sd(sum, _mm_unpackhi_pd(sum, sum)));
+#else
+	return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+#endif
+}
+
+inline static F64
+f64x4_length_squared(const F64x4 &a) { return f64x4_dot(a, a); }
+
+inline static F64
+f64x4_length(const F64x4 &a) { return f64_sqrt(f64x4_length_squared(a)); }
+
+inline static F64x4
+f64x4_normalize(const F64x4 &a) { return a / f64x4_length(a); }
+
+inline static F64x4
+f64x4_min(const F64x4 &a, const F64x4 &b)
+{
+	F64x4 r;
+#if defined(SIMD_NEON)
+	r.simd.lo = vminq_f64(a.simd.lo, b.simd.lo);
+	r.simd.hi = vminq_f64(a.simd.hi, b.simd.hi);
+#elif defined(SIMD_AVX)
+	r.simd = _mm256_min_pd(a.simd, b.simd);
+#else
+	r.x = f64_min(a.x, b.x); r.y = f64_min(a.y, b.y);
+	r.z = f64_min(a.z, b.z); r.w = f64_min(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static F64x4
+f64x4_max(const F64x4 &a, const F64x4 &b)
+{
+	F64x4 r;
+#if defined(SIMD_NEON)
+	r.simd.lo = vmaxq_f64(a.simd.lo, b.simd.lo);
+	r.simd.hi = vmaxq_f64(a.simd.hi, b.simd.hi);
+#elif defined(SIMD_AVX)
+	r.simd = _mm256_max_pd(a.simd, b.simd);
+#else
+	r.x = f64_max(a.x, b.x); r.y = f64_max(a.y, b.y);
+	r.z = f64_max(a.z, b.z); r.w = f64_max(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static F64x4
+f64x4_lerp(const F64x4 &a, const F64x4 &b, F64 t) { return a + (b - a) * t; }
+
+inline static F64x4
+f64x4_clamp(const F64x4 &v, const F64x4 &lo, const F64x4 &hi) { return f64x4_min(f64x4_max(v, lo), hi); }
+
+inline static bool
+f64x4_approx_equal(const F64x4 &a, const F64x4 &b, F64 epsilon)
+{
+	return f64_approx_equal(a.x, b.x, epsilon)
+	    && f64_approx_equal(a.y, b.y, epsilon)
+	    && f64_approx_equal(a.z, b.z, epsilon)
+	    && f64_approx_equal(a.w, b.w, epsilon);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr F64x4 F64X4_ZERO = {0.0, 0.0, 0.0, 0.0};
+static constexpr F64x4 F64X4_ONE  = {1.0, 1.0, 1.0, 1.0};
diff --git a/core/math/f64x4x4.h b/core/math/f64x4x4.h
new file mode 100644
index 00000000..cab3eb22
--- /dev/null
+++ b/core/math/f64x4x4.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f64.h>
+#include <core/math/f64x3.h>
+#include <core/math/f64x4.h>
+
+// ============================================================================
+// F64x4x4 — 4x4 F64 matrix, row-major, SIMD-backed.
+// 4 rows × F64x4 = 128 bytes. Same conventions as F32x4x4.
+// ============================================================================
+
+struct alignas(32) F64x4x4
+{
+	union
+	{
+		struct
+		{
+			F64 m00, m01, m02, m03;
+			F64 m10, m11, m12, m13;
+			F64 m20, m21, m22, m23;
+			F64 m30, m31, m32, m33;
+		};
+		F64x4 rows[4];
+	};
+};
+
+inline static const F64 &
+f64x4x4_at(const F64x4x4 &M, I32 i) { return *((const F64 *)&M + i); }
+
+inline static F64 &
+f64x4x4_at(F64x4x4 &M, I32 i) { return *((F64 *)&M + i); }
+
+inline static F64x4x4
+f64x4x4_identity()
+{
+	return F64x4x4{
+		1.0, 0.0, 0.0, 0.0,
+		0.0, 1.0, 0.0, 0.0,
+		0.0, 0.0, 1.0, 0.0,
+		0.0, 0.0, 0.0, 1.0
+	};
+}
+
+// ---- Element-wise operators ------------------------------------------------
+
+inline static F64x4x4
+operator+(const F64x4x4 &A, const F64x4x4 &B)
+{
+	F64x4x4 R;
+	R.rows[0] = A.rows[0] + B.rows[0];
+	R.rows[1] = A.rows[1] + B.rows[1];
+	R.rows[2] = A.rows[2] + B.rows[2];
+	R.rows[3] = A.rows[3] + B.rows[3];
+	return R;
+}
+
+inline static F64x4x4
+operator-(const F64x4x4 &M)
+{
+	F64x4x4 R;
+	R.rows[0] = -M.rows[0];
+	R.rows[1] = -M.rows[1];
+	R.rows[2] = -M.rows[2];
+	R.rows[3] = -M.rows[3];
+	return R;
+}
+
+inline static F64x4x4
+operator-(const F64x4x4 &A, const F64x4x4 &B)
+{
+	F64x4x4 R;
+	R.rows[0] = A.rows[0] - B.rows[0];
+	R.rows[1] = A.rows[1] - B.rows[1];
+	R.rows[2] = A.rows[2] - B.rows[2];
+	R.rows[3] = A.rows[3] - B.rows[3];
+	return R;
+}
+
+inline static F64x4x4
+operator*(const F64x4x4 &M, F64 s)
+{
+	F64x4x4 R;
+	R.rows[0] = M.rows[0] * s;
+	R.rows[1] = M.rows[1] * s;
+	R.rows[2] = M.rows[2] * s;
+	R.rows[3] = M.rows[3] * s;
+	return R;
+}
+
+inline static F64x4x4 operator*(F64 s, const F64x4x4 &M) { return M * s; }
+inline static F64x4x4 operator/(const F64x4x4 &M, F64 s) { return M * (1.0 / s); }
+
+inline static bool
+operator==(const F64x4x4 &A, const F64x4x4 &B)
+{
+	return A.rows[0] == B.rows[0] && A.rows[1] == B.rows[1]
+	    && A.rows[2] == B.rows[2] && A.rows[3] == B.rows[3];
+}
+
+// ---- Vec-mat / mat-mat multiply (row-vector) -------------------------------
+
+inline static F64x4
+operator*(const F64x4 &v, const F64x4x4 &M)
+{
+	return v.x * M.rows[0] + v.y * M.rows[1] + v.z * M.rows[2] + v.w * M.rows[3];
+}
+
+inline static F64x4x4
+operator*(const F64x4x4 &A, const F64x4x4 &B)
+{
+	F64x4x4 R;
+	R.rows[0] = A.rows[0] * B;
+	R.rows[1] = A.rows[1] * B;
+	R.rows[2] = A.rows[2] * B;
+	R.rows[3] = A.rows[3] * B;
+	return R;
+}
+
+// ---- Transpose / determinant / inverse -------------------------------------
+
+inline static F64x4x4
+f64x4x4_transpose(const F64x4x4 &M)
+{
+	return F64x4x4{
+		M.m00, M.m10, M.m20, M.m30,
+		M.m01, M.m11, M.m21, M.m31,
+		M.m02, M.m12, M.m22, M.m32,
+		M.m03, M.m13, M.m23, M.m33
+	};
+}
+
+inline static F64
+f64x4x4_determinant(const F64x4x4 &M)
+{
+	return (M.m00 * M.m11 - M.m01 * M.m10) * (M.m22 * M.m33 - M.m23 * M.m32)
+	     - (M.m00 * M.m12 - M.m02 * M.m10) * (M.m21 * M.m33 - M.m23 * M.m31)
+	     + (M.m00 * M.m13 - M.m03 * M.m10) * (M.m21 * M.m32 - M.m22 * M.m31)
+	     + (M.m01 * M.m12 - M.m02 * M.m11) * (M.m20 * M.m33 - M.m23 * M.m30)
+	     - (M.m01 * M.m13 - M.m03 * M.m11) * (M.m20 * M.m32 - M.m22 * M.m30)
+	     + (M.m02 * M.m13 - M.m03 * M.m12) * (M.m20 * M.m31 - M.m21 * M.m30);
+}
+
+inline static bool
+f64x4x4_is_invertible(const F64x4x4 &M) { return f64x4x4_determinant(M) != 0.0; }
+
+inline static F64x4x4
+f64x4x4_inverse(const F64x4x4 &M)
+{
+	F64 d = f64x4x4_determinant(M);
+	if (d == 0.0)
+		return F64x4x4{};
+
+	F64x4x4 adj = F64x4x4{
+		+ M.m11 * (M.m22 * M.m33 - M.m23 * M.m32) - M.m12 * (M.m21 * M.m33 - M.m23 * M.m31) + M.m13 * (M.m21 * M.m32 - M.m22 * M.m31),
+		- M.m01 * (M.m22 * M.m33 - M.m23 * M.m32) + M.m02 * (M.m21 * M.m33 - M.m23 * M.m31) - M.m03 * (M.m21 * M.m32 - M.m22 * M.m31),
+		+ M.m01 * (M.m12 * M.m33 - M.m13 * M.m32) - M.m02 * (M.m11 * M.m33 - M.m13 * M.m31) + M.m03 * (M.m11 * M.m32 - M.m12 * M.m31),
+		- M.m01 * (M.m12 * M.m23 - M.m13 * M.m22) + M.m02 * (M.m11 * M.m23 - M.m13 * M.m21) - M.m03 * (M.m11 * M.m22 - M.m12 * M.m21),
+
+		- M.m10 * (M.m22 * M.m33 - M.m23 * M.m32) + M.m12 * (M.m20 * M.m33 - M.m23 * M.m30) - M.m13 * (M.m20 * M.m32 - M.m22 * M.m30),
+		+ M.m00 * (M.m22 * M.m33 - M.m23 * M.m32) - M.m02 * (M.m20 * M.m33 - M.m23 * M.m30) + M.m03 * (M.m20 * M.m32 - M.m22 * M.m30),
+		- M.m00 * (M.m12 * M.m33 - M.m13 * M.m32) + M.m02 * (M.m10 * M.m33 - M.m13 * M.m30) - M.m03 * (M.m10 * M.m32 - M.m12 * M.m30),
+		+ M.m00 * (M.m12 * M.m23 - M.m13 * M.m22) - M.m02 * (M.m10 * M.m23 - M.m13 * M.m20) + M.m03 * (M.m10 * M.m22 - M.m12 * M.m20),
+
+		+ M.m10 * (M.m21 * M.m33 - M.m23 * M.m31) - M.m11 * (M.m20 * M.m33 - M.m23 * M.m30) + M.m13 * (M.m20 * M.m31 - M.m21 * M.m30),
+		- M.m00 * (M.m21 * M.m33 - M.m23 * M.m31) + M.m01 * (M.m20 * M.m33 - M.m23 * M.m30) - M.m03 * (M.m20 * M.m31 - M.m21 * M.m30),
+		+ M.m00 * (M.m11 * M.m33 - M.m13 * M.m31) - M.m01 * (M.m10 * M.m33 - M.m13 * M.m30) + M.m03 * (M.m10 * M.m31 - M.m11 * M.m30),
+		- M.m00 * (M.m11 * M.m23 - M.m13 * M.m21) + M.m01 * (M.m10 * M.m23 - M.m13 * M.m20) - M.m03 * (M.m10 * M.m21 - M.m11 * M.m20),
+
+		- M.m10 * (M.m21 * M.m32 - M.m22 * M.m31) + M.m11 * (M.m20 * M.m32 - M.m22 * M.m30) - M.m12 * (M.m20 * M.m31 - M.m21 * M.m30),
+		+ M.m00 * (M.m21 * M.m32 - M.m22 * M.m31) - M.m01 * (M.m20 * M.m32 - M.m22 * M.m30) + M.m02 * (M.m20 * M.m31 - M.m21 * M.m30),
+		- M.m00 * (M.m11 * M.m32 - M.m12 * M.m31) + M.m01 * (M.m10 * M.m32 - M.m12 * M.m30) - M.m02 * (M.m10 * M.m31 - M.m11 * M.m30),
+		+ M.m00 * (M.m11 * M.m22 - M.m12 * M.m21) - M.m01 * (M.m10 * M.m22 - M.m12 * M.m20) + M.m02 * (M.m10 * M.m21 - M.m11 * M.m20)
+	};
+
+	return adj * (1.0 / d);
+}
+
+// ---- TRS builders ----------------------------------------------------------
+
+inline static F64x4x4
+f64x4x4_translation(F64 dx, F64 dy, F64 dz)
+{
+	return F64x4x4{
+		1.0, 0.0, 0.0, 0.0,
+		0.0, 1.0, 0.0, 0.0,
+		0.0, 0.0, 1.0, 0.0,
+		dx,  dy,  dz,  1.0
+	};
+}
+
+inline static F64x4x4
+f64x4x4_translation(const F64x3 &t) { return f64x4x4_translation(t.x, t.y, t.z); }
+
+inline static F64x4x4
+f64x4x4_scaling(F64 sx, F64 sy, F64 sz)
+{
+	return F64x4x4{
+		sx,  0.0, 0.0, 0.0,
+		0.0, sy,  0.0, 0.0,
+		0.0, 0.0, sz,  0.0,
+		0.0, 0.0, 0.0, 1.0
+	};
+}
+
+inline static F64x4x4
+f64x4x4_scaling(const F64x3 &s) { return f64x4x4_scaling(s.x, s.y, s.z); }
diff --git a/core/math/i32.h b/core/math/i32.h
new file mode 100644
index 00000000..454e1346
--- /dev/null
+++ b/core/math/i32.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <core/defines.h>
+
+// ============================================================================
+// I32 scalar helpers. I32_MIN / I32_MAX live in core/defines.h.
+// ============================================================================
+
+inline static I32
+i32_abs(I32 x)
+{
+	return x < 0 ? -x : x;
+}
+
+inline static I32
+i32_sign(I32 x)
+{
+	if (x > 0) return  1;
+	if (x < 0) return -1;
+	return 0;
+}
+
+inline static I32
+i32_min(I32 a, I32 b)
+{
+	return a < b ? a : b;
+}
+
+inline static I32
+i32_max(I32 a, I32 b)
+{
+	return a > b ? a : b;
+}
+
+inline static I32
+i32_clamp(I32 x, I32 lo, I32 hi)
+{
+	if (x < lo) return lo;
+	if (x > hi) return hi;
+	return x;
+}
diff --git a/core/math/i32x2.h b/core/math/i32x2.h
new file mode 100644
index 00000000..5b7da14c
--- /dev/null
+++ b/core/math/i32x2.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/i32.h>
+
+// ============================================================================
+// I32x2 — 2D I32 vector (scalar).
+// ============================================================================
+
+struct I32x2
+{
+	I32 x, y;
+};
+
+inline static I32x2 operator+(const I32x2 &a, const I32x2 &b) { return I32x2{a.x + b.x, a.y + b.y}; }
+inline static I32x2 &operator+=(I32x2 &a, const I32x2 &b) { a = a + b; return a; }
+inline static I32x2 operator-(const I32x2 &a) { return I32x2{-a.x, -a.y}; }
+inline static I32x2 operator-(const I32x2 &a, const I32x2 &b) { return I32x2{a.x - b.x, a.y - b.y}; }
+inline static I32x2 &operator-=(I32x2 &a, const I32x2 &b) { a = a - b; return a; }
+inline static I32x2 operator*(const I32x2 &a, I32 s) { return I32x2{a.x * s, a.y * s}; }
+inline static I32x2 operator*(I32 s, const I32x2 &a) { return a * s; }
+inline static I32x2 &operator*=(I32x2 &a, I32 s) { a = a * s; return a; }
+inline static bool   operator==(const I32x2 &a, const I32x2 &b) { return a.x == b.x && a.y == b.y; }
+
+inline static I32    i32x2_dot(const I32x2 &a, const I32x2 &b) { return a.x * b.x + a.y * b.y; }
+inline static I32    i32x2_length_squared(const I32x2 &a) { return i32x2_dot(a, a); }
+inline static I32x2  i32x2_abs(const I32x2 &a) { return I32x2{i32_abs(a.x), i32_abs(a.y)}; }
+inline static I32x2  i32x2_min(const I32x2 &a, const I32x2 &b) { return I32x2{i32_min(a.x, b.x), i32_min(a.y, b.y)}; }
+inline static I32x2  i32x2_max(const I32x2 &a, const I32x2 &b) { return I32x2{i32_max(a.x, b.x), i32_max(a.y, b.y)}; }
+inline static I32x2  i32x2_clamp(const I32x2 &v, const I32x2 &lo, const I32x2 &hi) { return i32x2_min(i32x2_max(v, lo), hi); }
+
+static constexpr I32x2 I32X2_ZERO = {0, 0};
+static constexpr I32x2 I32X2_ONE  = {1, 1};
diff --git a/core/math/i32x3.h b/core/math/i32x3.h
new file mode 100644
index 00000000..1c194397
--- /dev/null
+++ b/core/math/i32x3.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/i32.h>
+
+// ============================================================================
+// I32x3 — 3D I32 vector (scalar, packed).
+// ============================================================================
+
+struct I32x3
+{
+	I32 x, y, z;
+};
+
+inline static I32x3 operator+(const I32x3 &a, const I32x3 &b) { return I32x3{a.x + b.x, a.y + b.y, a.z + b.z}; }
+inline static I32x3 &operator+=(I32x3 &a, const I32x3 &b) { a = a + b; return a; }
+inline static I32x3 operator-(const I32x3 &a) { return I32x3{-a.x, -a.y, -a.z}; }
+inline static I32x3 operator-(const I32x3 &a, const I32x3 &b) { return I32x3{a.x - b.x, a.y - b.y, a.z - b.z}; }
+inline static I32x3 &operator-=(I32x3 &a, const I32x3 &b) { a = a - b; return a; }
+inline static I32x3 operator*(const I32x3 &a, I32 s) { return I32x3{a.x * s, a.y * s, a.z * s}; }
+inline static I32x3 operator*(I32 s, const I32x3 &a) { return a * s; }
+inline static I32x3 &operator*=(I32x3 &a, I32 s) { a = a * s; return a; }
+inline static bool   operator==(const I32x3 &a, const I32x3 &b) { return a.x == b.x && a.y == b.y && a.z == b.z; }
+
+inline static I32    i32x3_dot(const I32x3 &a, const I32x3 &b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
+inline static I32    i32x3_length_squared(const I32x3 &a) { return i32x3_dot(a, a); }
+inline static I32x3  i32x3_abs(const I32x3 &a) { return I32x3{i32_abs(a.x), i32_abs(a.y), i32_abs(a.z)}; }
+inline static I32x3  i32x3_min(const I32x3 &a, const I32x3 &b) { return I32x3{i32_min(a.x, b.x), i32_min(a.y, b.y), i32_min(a.z, b.z)}; }
+inline static I32x3  i32x3_max(const I32x3 &a, const I32x3 &b) { return I32x3{i32_max(a.x, b.x), i32_max(a.y, b.y), i32_max(a.z, b.z)}; }
+inline static I32x3  i32x3_clamp(const I32x3 &v, const I32x3 &lo, const I32x3 &hi) { return i32x3_min(i32x3_max(v, lo), hi); }
+
+static constexpr I32x3 I32X3_ZERO = {0, 0, 0};
+static constexpr I32x3 I32X3_ONE  = {1, 1, 1};
diff --git a/core/math/i32x4.h b/core/math/i32x4.h
new file mode 100644
index 00000000..e80a0973
--- /dev/null
+++ b/core/math/i32x4.h
@@ -0,0 +1,187 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/i32.h>
+
+// ============================================================================
+// I32x4 — 4D I32 vector, SIMD-backed. Useful for SIMD masks, packed flags,
+// cluster indexing, batched integer arithmetic.
+// ============================================================================
+
+#if defined(SIMD_FORCE_SCALAR)
+	struct Simd_I32x4 { I32 v[4]; };
+#elif defined(SIMD_NEON)
+	#include <arm_neon.h>
+	typedef int32x4_t Simd_I32x4;
+#elif defined(SIMD_AVX)
+	#include <immintrin.h>
+	typedef __m128i Simd_I32x4;
+#else
+	struct Simd_I32x4 { I32 v[4]; };
+#endif
+
+struct alignas(16) I32x4
+{
+	union
+	{
+		struct { I32 x, y, z, w; };
+		Simd_I32x4 simd;
+	};
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static I32x4
+operator+(const I32x4 &a, const I32x4 &b)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vaddq_s32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_add_epi32(a.simd, b.simd);
+#else
+	r.x = a.x + b.x; r.y = a.y + b.y; r.z = a.z + b.z; r.w = a.w + b.w;
+#endif
+	return r;
+}
+
+inline static I32x4 &
+operator+=(I32x4 &a, const I32x4 &b) { a = a + b; return a; }
+
+inline static I32x4
+operator-(const I32x4 &a)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vnegq_s32(a.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_sub_epi32(_mm_setzero_si128(), a.simd);
+#else
+	r.x = -a.x; r.y = -a.y; r.z = -a.z; r.w = -a.w;
+#endif
+	return r;
+}
+
+inline static I32x4
+operator-(const I32x4 &a, const I32x4 &b)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vsubq_s32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_sub_epi32(a.simd, b.simd);
+#else
+	r.x = a.x - b.x; r.y = a.y - b.y; r.z = a.z - b.z; r.w = a.w - b.w;
+#endif
+	return r;
+}
+
+inline static I32x4 &
+operator-=(I32x4 &a, const I32x4 &b) { a = a - b; return a; }
+
+inline static I32x4
+operator*(const I32x4 &a, I32 s)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vmulq_n_s32(a.simd, s);
+#elif defined(SIMD_AVX)
+	// _mm_mullo_epi32 is SSE4.1; guaranteed under AVX baseline.
+	r.simd = _mm_mullo_epi32(a.simd, _mm_set1_epi32(s));
+#else
+	r.x = a.x * s; r.y = a.y * s; r.z = a.z * s; r.w = a.w * s;
+#endif
+	return r;
+}
+
+inline static I32x4
+operator*(I32 s, const I32x4 &a) { return a * s; }
+
+inline static I32x4 &
+operator*=(I32x4 &a, I32 s) { a = a * s; return a; }
+
+inline static bool
+operator==(const I32x4 &a, const I32x4 &b)
+{
+	return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
+}
+
+// ---- Free functions --------------------------------------------------------
+
+inline static I32x4
+i32x4_from_i32(I32 s)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vdupq_n_s32(s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_set1_epi32(s);
+#else
+	r.x = s; r.y = s; r.z = s; r.w = s;
+#endif
+	return r;
+}
+
+inline static I32
+i32x4_dot(const I32x4 &a, const I32x4 &b)
+{
+	return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+}
+
+inline static I32
+i32x4_length_squared(const I32x4 &a) { return i32x4_dot(a, a); }
+
+inline static I32x4
+i32x4_abs(const I32x4 &a)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vabsq_s32(a.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_abs_epi32(a.simd);  // SSSE3+
+#else
+	r.x = i32_abs(a.x); r.y = i32_abs(a.y); r.z = i32_abs(a.z); r.w = i32_abs(a.w);
+#endif
+	return r;
+}
+
+inline static I32x4
+i32x4_min(const I32x4 &a, const I32x4 &b)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vminq_s32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_min_epi32(a.simd, b.simd);  // SSE4.1+
+#else
+	r.x = i32_min(a.x, b.x); r.y = i32_min(a.y, b.y);
+	r.z = i32_min(a.z, b.z); r.w = i32_min(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static I32x4
+i32x4_max(const I32x4 &a, const I32x4 &b)
+{
+	I32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vmaxq_s32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_max_epi32(a.simd, b.simd);  // SSE4.1+
+#else
+	r.x = i32_max(a.x, b.x); r.y = i32_max(a.y, b.y);
+	r.z = i32_max(a.z, b.z); r.w = i32_max(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static I32x4
+i32x4_clamp(const I32x4 &v, const I32x4 &lo, const I32x4 &hi)
+{
+	return i32x4_min(i32x4_max(v, lo), hi);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr I32x4 I32X4_ZERO = {0, 0, 0, 0};
+static constexpr I32x4 I32X4_ONE  = {1, 1, 1, 1};
diff --git a/core/math/i64.h b/core/math/i64.h
new file mode 100644
index 00000000..9117408f
--- /dev/null
+++ b/core/math/i64.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <core/defines.h>
+
+// ============================================================================
+// I64 scalar helpers. I64_MIN / I64_MAX live in core/defines.h.
+// ============================================================================
+
+inline static I64
+i64_abs(I64 x)
+{
+	return x < 0 ? -x : x;
+}
+
+inline static I64
+i64_sign(I64 x)
+{
+	if (x > 0) return  1;
+	if (x < 0) return -1;
+	return 0;
+}
+
+inline static I64
+i64_min(I64 a, I64 b)
+{
+	return a < b ? a : b;
+}
+
+inline static I64
+i64_max(I64 a, I64 b)
+{
+	return a > b ? a : b;
+}
+
+inline static I64
+i64_clamp(I64 x, I64 lo, I64 hi)
+{
+	if (x < lo) return lo;
+	if (x > hi) return hi;
+	return x;
+}
diff --git a/core/math/quaternion.h b/core/math/quaternion.h
new file mode 100644
index 00000000..3aa744aa
--- /dev/null
+++ b/core/math/quaternion.h
@@ -0,0 +1,409 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+#include <core/math/f32x3.h>
+#include <core/math/f32x4x4.h>
+
+// ============================================================================
+// Quaternion — unit-quaternion rotation representation.
+//
+// Storage order is (w, x, y, z). Unit quaternions satisfy w² + x² + y² + z² = 1
+// and represent a rotation of angle 2·acos(w) about axis (x, y, z)/sin(acos(w)).
+//
+// Composition follows the engine's row-vector convention: rotating a vector by
+// two quaternions in sequence is written `v * q1 * q2` on the CPU — `q1`
+// applied first, `q2` applied second. Quaternion multiplication is non-commutative.
+// ============================================================================
+
+struct Quaternion
+{
+	F32 w, x, y, z;
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static Quaternion
+operator+(const Quaternion &p, const Quaternion &q)
+{
+	return Quaternion{p.w + q.w, p.x + q.x, p.y + q.y, p.z + q.z};
+}
+
+inline static Quaternion &
+operator+=(Quaternion &p, const Quaternion &q) { p = p + q; return p; }
+
+inline static Quaternion
+operator-(const Quaternion &q) { return Quaternion{-q.w, -q.x, -q.y, -q.z}; }
+
+inline static Quaternion
+operator-(const Quaternion &p, const Quaternion &q)
+{
+	return Quaternion{p.w - q.w, p.x - q.x, p.y - q.y, p.z - q.z};
+}
+
+inline static Quaternion &
+operator-=(Quaternion &p, const Quaternion &q) { p = p - q; return p; }
+
+inline static Quaternion
+operator*(const Quaternion &q, F32 s)
+{
+	return Quaternion{q.w * s, q.x * s, q.y * s, q.z * s};
+}
+
+inline static Quaternion
+operator*(F32 s, const Quaternion &q) { return q * s; }
+
+inline static Quaternion &
+operator*=(Quaternion &q, F32 s) { q = q * s; return q; }
+
+inline static Quaternion
+operator/(const Quaternion &q, F32 s) { return q * (1.0f / s); }
+
+inline static Quaternion &
+operator/=(Quaternion &q, F32 s) { q = q / s; return q; }
+
+// Quaternion product — non-commutative; composition semantics match row-vector
+// convention (see header comment).
+inline static Quaternion
+operator*(const Quaternion &p, const Quaternion &q)
+{
+	return Quaternion{
+		p.w * q.w - p.x * q.x - p.y * q.y - p.z * q.z,
+		p.w * q.x + p.x * q.w + p.y * q.z - p.z * q.y,
+		p.w * q.y - p.x * q.z + p.y * q.w + p.z * q.x,
+		p.w * q.z + p.x * q.y - p.y * q.x + p.z * q.w
+	};
+}
+
+inline static Quaternion &
+operator*=(Quaternion &p, const Quaternion &q) { p = p * q; return p; }
+
+inline static bool
+operator==(const Quaternion &p, const Quaternion &q)
+{
+	return p.w == q.w && p.x == q.x && p.y == q.y && p.z == q.z;
+}
+
+// ---- Rotate a vector by a quaternion (v' = v * q) --------------------------
+// Uses the standard formula v' = v + 2 * q.xyz × (q.xyz × v + q.w * v).
+
+inline static F32x3
+operator*(const F32x3 &v, const Quaternion &q)
+{
+	F32x3 u = {q.x, q.y, q.z};
+	F32x3 t = 2.0f * f32x3_cross(u, v);
+	return v + q.w * t + f32x3_cross(u, t);
+}
+
+inline static F32x3 &
+operator*=(F32x3 &v, const Quaternion &q) { v = v * q; return v; }
+
+// ---- Construction ----------------------------------------------------------
+
+inline static Quaternion
+quaternion_identity() { return Quaternion{1.0f, 0.0f, 0.0f, 0.0f}; }
+
+inline static Quaternion
+quaternion_from_axis_angle(const F32x3 &axis, F32 angle_in_radians)
+{
+	F32 half = angle_in_radians * 0.5f;
+	F32 s    = f32_sin(half);
+	return Quaternion{f32_cos(half), s * axis.x, s * axis.y, s * axis.z};
+}
+
+// Right-handed XYZ-intrinsic Euler → Quaternion. Angles are in radians.
+// Composition: rotate about X first, then Y, then Z (roll last). No -z hack.
+// Fixes the legacy `quat_from_angles` which was documented as left-handed and
+// compensated the z imaginary with a negation (see plan).
+inline static Quaternion
+quaternion_from_euler_angles(const F32x3 &angles_in_radians)
+{
+	F32 cx = f32_cos(angles_in_radians.x * 0.5f);
+	F32 sx = f32_sin(angles_in_radians.x * 0.5f);
+	F32 cy = f32_cos(angles_in_radians.y * 0.5f);
+	F32 sy = f32_sin(angles_in_radians.y * 0.5f);
+	F32 cz = f32_cos(angles_in_radians.z * 0.5f);
+	F32 sz = f32_sin(angles_in_radians.z * 0.5f);
+
+	return Quaternion{
+		cx * cy * cz + sx * sy * sz,   // w
+		sx * cy * cz - cx * sy * sz,   // x
+		cx * sy * cz + sx * cy * sz,   // y
+		cx * cy * sz - sx * sy * cz    // z
+	};
+}
+
+// Extract XYZ-intrinsic Euler angles (radians) from a unit quaternion. Handles
+// gimbal-lock at ±90° pitch by clamping asin.
+inline static F32x3
+quaternion_to_euler_angles(const Quaternion &q)
+{
+	F32x3 r;
+	r.x = f32_atan2(2.0f * (q.w * q.x + q.y * q.z), 1.0f - 2.0f * (q.x * q.x + q.y * q.y));
+
+	F32 siny = 2.0f * (q.w * q.y - q.z * q.x);
+	if (f32_abs(siny) >= 1.0f)
+		r.y = f32_sign(siny) * F32_PI_OVER_2;
+	else
+		r.y = f32_asin(siny);
+
+	r.z = f32_atan2(2.0f * (q.w * q.z + q.x * q.y), 1.0f - 2.0f * (q.y * q.y + q.z * q.z));
+	return r;
+}
+
+// ---- Magnitude / normalization --------------------------------------------
+
+inline static F32
+quaternion_dot(const Quaternion &p, const Quaternion &q)
+{
+	return p.w * q.w + p.x * q.x + p.y * q.y + p.z * q.z;
+}
+
+inline static F32
+quaternion_length_squared(const Quaternion &q) { return quaternion_dot(q, q); }
+
+inline static F32
+quaternion_length(const Quaternion &q) { return f32_sqrt(quaternion_length_squared(q)); }
+
+inline static Quaternion
+quaternion_normalize(const Quaternion &q) { return q / quaternion_length(q); }
+
+inline static Quaternion
+quaternion_inverse(const Quaternion &q)
+{
+	// For unit quaternions this is the conjugate; we divide by ||q||² to stay correct for non-unit.
+	return Quaternion{q.w, -q.x, -q.y, -q.z} / quaternion_length_squared(q);
+}
+
+// ---- SLERP -----------------------------------------------------------------
+// Shortest-path spherical linear interpolation. Falls back to nlerp near t=0.
+
+inline static Quaternion
+quaternion_slerp(Quaternion a, Quaternion b, F32 t)
+{
+	F32 dot = quaternion_dot(a, b);
+
+	// If the dot is negative, negate one endpoint to take the shorter arc.
+	if (dot < 0.0f)
+	{
+		b = -b;
+		dot = -dot;
+	}
+
+	// If the inputs are nearly parallel, linearly interpolate and re-normalize.
+	constexpr F32 SLERP_LINEAR_THRESHOLD = 0.9995f;
+	if (dot > SLERP_LINEAR_THRESHOLD)
+	{
+		Quaternion r = {
+			a.w + t * (b.w - a.w),
+			a.x + t * (b.x - a.x),
+			a.y + t * (b.y - a.y),
+			a.z + t * (b.z - a.z)
+		};
+		return quaternion_normalize(r);
+	}
+
+	F32 theta_0   = f32_acos(dot);
+	F32 theta     = theta_0 * t;
+	F32 sin_theta = f32_sin(theta);
+	F32 sin_0     = f32_sin(theta_0);
+
+	F32 s0 = f32_cos(theta) - dot * sin_theta / sin_0;
+	F32 s1 = sin_theta / sin_0;
+
+	return Quaternion{
+		s0 * a.w + s1 * b.w,
+		s0 * a.x + s1 * b.x,
+		s0 * a.y + s1 * b.y,
+		s0 * a.z + s1 * b.z
+	};
+}
+
+inline static bool
+quaternion_approx_equal(const Quaternion &a, const Quaternion &b, F32 epsilon)
+{
+	return f32_approx_equal(a.w, b.w, epsilon)
+	    && f32_approx_equal(a.x, b.x, epsilon)
+	    && f32_approx_equal(a.y, b.y, epsilon)
+	    && f32_approx_equal(a.z, b.z, epsilon);
+}
+
+// ---- Staples (new in v1) ---------------------------------------------------
+
+// Shortest-arc rotation mapping `from` to `to`. Both vectors need not be unit —
+// the function normalizes internally.
+inline static Quaternion
+quaternion_from_to_rotation(const F32x3 &from, const F32x3 &to)
+{
+	F32x3 u = f32x3_normalize(from);
+	F32x3 v = f32x3_normalize(to);
+	F32   d = f32x3_dot(u, v);
+
+	// Anti-parallel: pick any axis perpendicular to u.
+	if (d < -0.9999f)
+	{
+		F32x3 axis = f32x3_cross(F32X3_RIGHT, u);
+		if (f32x3_length_squared(axis) < 1e-6f)
+			axis = f32x3_cross(F32X3_UP, u);
+		return quaternion_from_axis_angle(f32x3_normalize(axis), F32_PI);
+	}
+
+	F32x3 c = f32x3_cross(u, v);
+	F32   s = f32_sqrt((1.0f + d) * 2.0f);
+	F32   inv_s = 1.0f / s;
+	return Quaternion{s * 0.5f, c.x * inv_s, c.y * inv_s, c.z * inv_s};
+}
+
+// Build an orientation whose local -Z points along `forward` and local +Y is
+// aligned with `up` (Gram-Schmidt orthogonalization). Matches the camera
+// convention from `f32x4x4_look_at` but returns a quaternion for object orientation.
+inline static Quaternion
+quaternion_look_rotation(const F32x3 &forward, const F32x3 &up)
+{
+	// Local -Z = forward (canonical convention: camera looks down -Z).
+	F32x3 axis_z = f32x3_normalize(-forward);
+	F32x3 axis_x = f32x3_normalize(f32x3_cross(up, axis_z));
+	F32x3 axis_y = f32x3_cross(axis_z, axis_x);
+
+	// Build quaternion from a basis via the standard rotation-matrix-to-quat
+	// algorithm applied to the 3x3 rotation matrix whose columns are (axis_x,
+	// axis_y, axis_z).
+	F32 trace = axis_x.x + axis_y.y + axis_z.z;
+	Quaternion q;
+	if (trace > 0.0f)
+	{
+		F32 s = f32_sqrt(trace + 1.0f) * 2.0f;  // s = 4 * w
+		q.w = 0.25f * s;
+		q.x = (axis_y.z - axis_z.y) / s;
+		q.y = (axis_z.x - axis_x.z) / s;
+		q.z = (axis_x.y - axis_y.x) / s;
+	}
+	else if (axis_x.x > axis_y.y && axis_x.x > axis_z.z)
+	{
+		F32 s = f32_sqrt(1.0f + axis_x.x - axis_y.y - axis_z.z) * 2.0f;
+		q.w = (axis_y.z - axis_z.y) / s;
+		q.x = 0.25f * s;
+		q.y = (axis_y.x + axis_x.y) / s;
+		q.z = (axis_z.x + axis_x.z) / s;
+	}
+	else if (axis_y.y > axis_z.z)
+	{
+		F32 s = f32_sqrt(1.0f + axis_y.y - axis_x.x - axis_z.z) * 2.0f;
+		q.w = (axis_z.x - axis_x.z) / s;
+		q.x = (axis_y.x + axis_x.y) / s;
+		q.y = 0.25f * s;
+		q.z = (axis_z.y + axis_y.z) / s;
+	}
+	else
+	{
+		F32 s = f32_sqrt(1.0f + axis_z.z - axis_x.x - axis_y.y) * 2.0f;
+		q.w = (axis_x.y - axis_y.x) / s;
+		q.x = (axis_z.x + axis_x.z) / s;
+		q.y = (axis_z.y + axis_y.z) / s;
+		q.z = 0.25f * s;
+	}
+	return quaternion_normalize(q);
+}
+
+// Rotate `current` toward `target`, clamped so no single step exceeds
+// `max_angle_radians`. Handy for smooth AI aim, camera follow, etc.
+inline static Quaternion
+quaternion_rotate_towards(const Quaternion &current, const Quaternion &target, F32 max_angle_radians)
+{
+	F32 dot = f32_clamp(quaternion_dot(current, target), -1.0f, 1.0f);
+	if (dot < 0.0f)
+		dot = -dot;
+	F32 angle = 2.0f * f32_acos(dot);
+	if (angle <= max_angle_radians || angle == 0.0f)
+		return target;
+	return quaternion_slerp(current, target, max_angle_radians / angle);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr Quaternion QUATERNION_IDENTITY = {1.0f, 0.0f, 0.0f, 0.0f};
+
+// ============================================================================
+// F32x4x4 ↔ Quaternion conversions. Lives here to avoid circular include
+// between f32x4x4.h and quaternion.h.
+// ============================================================================
+
+inline static F32x4x4
+f32x4x4_from_quaternion(const Quaternion &q_in)
+{
+	Quaternion q = quaternion_normalize(q_in);
+	F32 w = q.w, x = q.x, y = q.y, z = q.z;
+
+	return F32x4x4{
+		1.0f - 2.0f * y * y - 2.0f * z * z,   2.0f * x * y + 2.0f * z * w,         2.0f * x * z - 2.0f * y * w,         0.0f,
+		2.0f * x * y - 2.0f * z * w,          1.0f - 2.0f * x * x - 2.0f * z * z,  2.0f * y * z + 2.0f * x * w,         0.0f,
+		2.0f * x * z + 2.0f * y * w,          2.0f * y * z - 2.0f * x * w,         1.0f - 2.0f * x * x - 2.0f * y * y,  0.0f,
+		0.0f,                                 0.0f,                                 0.0f,                                 1.0f
+	};
+}
+
+// Decompose a TRS (translation × rotation × scale) matrix built via the
+// row-vector convention into its components. Returns false on degenerate input.
+inline static bool
+f32x4x4_decompose(const F32x4x4 &M, F32x3 *out_translation, Quaternion *out_rotation, F32x3 *out_scale)
+{
+	if (out_translation)
+		*out_translation = F32x3{M.m30, M.m31, M.m32};
+
+	F32x3 row0 = {M.m00, M.m01, M.m02};
+	F32x3 row1 = {M.m10, M.m11, M.m12};
+	F32x3 row2 = {M.m20, M.m21, M.m22};
+
+	F32 sx = f32x3_length(row0);
+	F32 sy = f32x3_length(row1);
+	F32 sz = f32x3_length(row2);
+
+	if (sx < 1e-6f || sy < 1e-6f || sz < 1e-6f)
+		return false;
+
+	if (out_scale)
+		*out_scale = F32x3{sx, sy, sz};
+
+	F32x3 rx = row0 / sx;
+	F32x3 ry = row1 / sy;
+	F32x3 rz = row2 / sz;
+
+	if (out_rotation)
+	{
+		F32 trace = rx.x + ry.y + rz.z;
+		if (trace > 0.0f)
+		{
+			F32 s = f32_sqrt(trace + 1.0f) * 2.0f;
+			out_rotation->w = 0.25f * s;
+			out_rotation->x = (ry.z - rz.y) / s;
+			out_rotation->y = (rz.x - rx.z) / s;
+			out_rotation->z = (rx.y - ry.x) / s;
+		}
+		else if (rx.x > ry.y && rx.x > rz.z)
+		{
+			F32 s = f32_sqrt(1.0f + rx.x - ry.y - rz.z) * 2.0f;
+			out_rotation->w = (ry.z - rz.y) / s;
+			out_rotation->x = 0.25f * s;
+			out_rotation->y = (ry.x + rx.y) / s;
+			out_rotation->z = (rz.x + rx.z) / s;
+		}
+		else if (ry.y > rz.z)
+		{
+			F32 s = f32_sqrt(1.0f + ry.y - rx.x - rz.z) * 2.0f;
+			out_rotation->w = (rz.x - rx.z) / s;
+			out_rotation->x = (ry.x + rx.y) / s;
+			out_rotation->y = 0.25f * s;
+			out_rotation->z = (rz.y + ry.z) / s;
+		}
+		else
+		{
+			F32 s = f32_sqrt(1.0f + rz.z - rx.x - ry.y) * 2.0f;
+			out_rotation->w = (rx.y - ry.x) / s;
+			out_rotation->x = (rz.x + rx.z) / s;
+			out_rotation->y = (rz.y + ry.z) / s;
+			out_rotation->z = 0.25f * s;
+		}
+		*out_rotation = quaternion_normalize(*out_rotation);
+	}
+	return true;
+}
diff --git a/core/math/random.h b/core/math/random.h
new file mode 100644
index 00000000..01ee2eb7
--- /dev/null
+++ b/core/math/random.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/f32.h>
+#include <core/math/f64.h>
+#include <core/math/f32x2.h>
+#include <core/math/f32x3.h>
+#include <core/math/quaternion.h>
+
+#include <string.h>  // For ::memcpy (bit-cast U32 → F32 in f32_random_unit).
+
+// ============================================================================
+// Random — explicit-state PRNG (xoshiro256**) + typed samplers.
+//
+// No hidden global state. Seeded explicitly for reproducibility (replays,
+// networking, tests). Core pattern:
+//
+//     Random rng = random_from_seed(0xdeadbeef);
+//     F32 angle  = f32_random_range(rng, 0.0f, F32_TAU);
+//
+// xoshiro256** has 256-bit state, passes BigCrush, is faster than Mersenne
+// Twister, and has a jump primitive for parallel streams (not exposed yet —
+// revisit when needed).
+// ============================================================================
+
+struct Random
+{
+	U64 state[4];
+};
+
+// SplitMix64 — expands a single 64-bit seed into four uncorrelated 64-bit
+// words for the xoshiro state. From the xoshiro reference implementation.
+inline static U64
+_random_splitmix64_next(U64 &s)
+{
+	U64 z = (s += 0x9E3779B97F4A7C15ULL);
+	z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
+	z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
+	return z ^ (z >> 31);
+}
+
+inline static U64
+_random_rotate_left(U64 x, I32 k) { return (x << k) | (x >> (64 - k)); }
+
+inline static void
+random_seed(Random &rng, U64 seed)
+{
+	U64 s = seed;
+	rng.state[0] = _random_splitmix64_next(s);
+	rng.state[1] = _random_splitmix64_next(s);
+	rng.state[2] = _random_splitmix64_next(s);
+	rng.state[3] = _random_splitmix64_next(s);
+}
+
+// Convenience: construct and seed in one step. Useful for throwaway streams
+// where you don't need to hold the `Random` across function boundaries.
+inline static Random
+random_from_seed(U64 seed)
+{
+	Random rng;
+	random_seed(rng, seed);
+	return rng;
+}
+
+inline static U64
+random_u64(Random &rng)
+{
+	// xoshiro256** — reference implementation.
+	const U64 result = _random_rotate_left(rng.state[1] * 5, 7) * 9;
+	const U64 t      = rng.state[1] << 17;
+
+	rng.state[2] ^= rng.state[0];
+	rng.state[3] ^= rng.state[1];
+	rng.state[1] ^= rng.state[2];
+	rng.state[0] ^= rng.state[3];
+	rng.state[2] ^= t;
+	rng.state[3] = _random_rotate_left(rng.state[3], 45);
+
+	return result;
+}
+
+inline static U32
+random_u32(Random &rng)
+{
+	// High bits of xoshiro256** have best quality.
+	return (U32)(random_u64(rng) >> 32);
+}
+
+// ---- Typed samplers (floats) -----------------------------------------------
+// Map the top 24 bits of a U32 into [0, 1) by interpreting as a float in
+// [1.0, 2.0) and subtracting 1 — standard trick that avoids bias from modulo.
+
+inline static F32
+f32_random_unit(Random &rng)
+{
+	U32 bits = (random_u32(rng) >> 8) | 0x3F800000u;
+	F32 f;
+	::memcpy(&f, &bits, sizeof(f));
+	return f - 1.0f;
+}
+
+inline static F32
+f32_random_range(Random &rng, F32 min, F32 max)
+{
+	return min + f32_random_unit(rng) * (max - min);
+}
+
+inline static F64
+f64_random_unit(Random &rng)
+{
+	// Use top 52 bits.
+	U64 bits = (random_u64(rng) >> 12) | 0x3FF0000000000000ULL;
+	F64 d;
+	::memcpy(&d, &bits, sizeof(d));
+	return d - 1.0;
+}
+
+inline static F64
+f64_random_range(Random &rng, F64 min, F64 max)
+{
+	return min + f64_random_unit(rng) * (max - min);
+}
+
+// ---- Typed samplers (integers) ---------------------------------------------
+
+inline static I32
+i32_random_range(Random &rng, I32 min, I32 max)
+{
+	// Inclusive: [min, max]. Uses U64 modulo which has negligible bias for
+	// typical small ranges.
+	U64 span  = (U64)((I64)max - (I64)min + 1);
+	U64 value = random_u64(rng) % span;
+	return (I32)((I64)min + (I64)value);
+}
+
+inline static U32
+u32_random_range(Random &rng, U32 min, U32 max)
+{
+	U64 span  = (U64)max - (U64)min + 1;
+	U64 value = random_u64(rng) % span;
+	return (U32)(min + (U32)value);
+}
+
+// ---- Typed samplers (geometric) --------------------------------------------
+
+// Uniform point inside the unit disk (|p| ≤ 1). Rejection sampling — cheap
+// and unbiased.
+inline static F32x2
+f32x2_random_in_unit_disk(Random &rng)
+{
+	for (;;)
+	{
+		F32x2 p = {
+			f32_random_range(rng, -1.0f, 1.0f),
+			f32_random_range(rng, -1.0f, 1.0f)
+		};
+		if (f32x2_length_squared(p) <= 1.0f)
+			return p;
+	}
+}
+
+// Uniform point inside the unit sphere (|p| ≤ 1).
+inline static F32x3
+f32x3_random_in_unit_sphere(Random &rng)
+{
+	for (;;)
+	{
+		F32x3 p = {
+			f32_random_range(rng, -1.0f, 1.0f),
+			f32_random_range(rng, -1.0f, 1.0f),
+			f32_random_range(rng, -1.0f, 1.0f)
+		};
+		if (f32x3_length_squared(p) <= 1.0f)
+			return p;
+	}
+}
+
+// Uniform point on the surface of the unit sphere (|p| = 1). Uses spherical
+// coordinates with inverse-CDF sampling — exact, no rejection.
+inline static F32x3
+f32x3_random_on_unit_sphere(Random &rng)
+{
+	F32 z   = f32_random_range(rng, -1.0f, 1.0f);
+	F32 phi = f32_random_range(rng,  0.0f, F32_TAU);
+	F32 r   = f32_sqrt(1.0f - z * z);
+	return F32x3{r * f32_cos(phi), r * f32_sin(phi), z};
+}
+
+// Uniform random rotation (Shoemake's method, 1992). Samples three uniform
+// variables and combines them into a unit quaternion distributed uniformly on SO(3).
+inline static Quaternion
+quaternion_random(Random &rng)
+{
+	F32 u1 = f32_random_unit(rng);
+	F32 u2 = f32_random_range(rng, 0.0f, F32_TAU);
+	F32 u3 = f32_random_range(rng, 0.0f, F32_TAU);
+
+	F32 a = f32_sqrt(1.0f - u1);
+	F32 b = f32_sqrt(u1);
+
+	return Quaternion{
+		a * f32_cos(u2),
+		a * f32_sin(u2),
+		b * f32_cos(u3),
+		b * f32_sin(u3)
+	};
+}
diff --git a/core/math/u32.h b/core/math/u32.h
new file mode 100644
index 00000000..6933b94a
--- /dev/null
+++ b/core/math/u32.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <core/defines.h>
+
+// ============================================================================
+// U32 scalar helpers. U32_MAX lives in core/defines.h. No abs/sign for unsigned.
+// ============================================================================
+
+inline static U32
+u32_min(U32 a, U32 b)
+{
+	return a < b ? a : b;
+}
+
+inline static U32
+u32_max(U32 a, U32 b)
+{
+	return a > b ? a : b;
+}
+
+inline static U32
+u32_clamp(U32 x, U32 lo, U32 hi)
+{
+	if (x < lo) return lo;
+	if (x > hi) return hi;
+	return x;
+}
diff --git a/core/math/u32x2.h b/core/math/u32x2.h
new file mode 100644
index 00000000..210ab099
--- /dev/null
+++ b/core/math/u32x2.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/u32.h>
+
+// ============================================================================
+// U32x2 — 2D U32 vector (scalar). Commonly texture coords / grid indices.
+// ============================================================================
+
+struct U32x2
+{
+	U32 x, y;
+};
+
+inline static U32x2 operator+(const U32x2 &a, const U32x2 &b) { return U32x2{a.x + b.x, a.y + b.y}; }
+inline static U32x2 &operator+=(U32x2 &a, const U32x2 &b) { a = a + b; return a; }
+inline static U32x2 operator-(const U32x2 &a, const U32x2 &b) { return U32x2{a.x - b.x, a.y - b.y}; }
+inline static U32x2 &operator-=(U32x2 &a, const U32x2 &b) { a = a - b; return a; }
+inline static U32x2 operator*(const U32x2 &a, U32 s) { return U32x2{a.x * s, a.y * s}; }
+inline static U32x2 operator*(U32 s, const U32x2 &a) { return a * s; }
+inline static U32x2 &operator*=(U32x2 &a, U32 s) { a = a * s; return a; }
+inline static bool   operator==(const U32x2 &a, const U32x2 &b) { return a.x == b.x && a.y == b.y; }
+
+inline static U32    u32x2_dot(const U32x2 &a, const U32x2 &b) { return a.x * b.x + a.y * b.y; }
+inline static U32    u32x2_length_squared(const U32x2 &a) { return u32x2_dot(a, a); }
+inline static U32x2  u32x2_min(const U32x2 &a, const U32x2 &b) { return U32x2{u32_min(a.x, b.x), u32_min(a.y, b.y)}; }
+inline static U32x2  u32x2_max(const U32x2 &a, const U32x2 &b) { return U32x2{u32_max(a.x, b.x), u32_max(a.y, b.y)}; }
+inline static U32x2  u32x2_clamp(const U32x2 &v, const U32x2 &lo, const U32x2 &hi) { return u32x2_min(u32x2_max(v, lo), hi); }
+
+static constexpr U32x2 U32X2_ZERO = {0u, 0u};
+static constexpr U32x2 U32X2_ONE  = {1u, 1u};
diff --git a/core/math/u32x3.h b/core/math/u32x3.h
new file mode 100644
index 00000000..01599619
--- /dev/null
+++ b/core/math/u32x3.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/u32.h>
+
+// ============================================================================
+// U32x3 — 3D U32 vector (scalar). Commonly 3D grid / cluster indices.
+// ============================================================================
+
+struct U32x3
+{
+	U32 x, y, z;
+};
+
+inline static U32x3 operator+(const U32x3 &a, const U32x3 &b) { return U32x3{a.x + b.x, a.y + b.y, a.z + b.z}; }
+inline static U32x3 &operator+=(U32x3 &a, const U32x3 &b) { a = a + b; return a; }
+inline static U32x3 operator-(const U32x3 &a, const U32x3 &b) { return U32x3{a.x - b.x, a.y - b.y, a.z - b.z}; }
+inline static U32x3 &operator-=(U32x3 &a, const U32x3 &b) { a = a - b; return a; }
+inline static U32x3 operator*(const U32x3 &a, U32 s) { return U32x3{a.x * s, a.y * s, a.z * s}; }
+inline static U32x3 operator*(U32 s, const U32x3 &a) { return a * s; }
+inline static U32x3 &operator*=(U32x3 &a, U32 s) { a = a * s; return a; }
+inline static bool   operator==(const U32x3 &a, const U32x3 &b) { return a.x == b.x && a.y == b.y && a.z == b.z; }
+
+inline static U32    u32x3_dot(const U32x3 &a, const U32x3 &b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
+inline static U32    u32x3_length_squared(const U32x3 &a) { return u32x3_dot(a, a); }
+inline static U32x3  u32x3_min(const U32x3 &a, const U32x3 &b) { return U32x3{u32_min(a.x, b.x), u32_min(a.y, b.y), u32_min(a.z, b.z)}; }
+inline static U32x3  u32x3_max(const U32x3 &a, const U32x3 &b) { return U32x3{u32_max(a.x, b.x), u32_max(a.y, b.y), u32_max(a.z, b.z)}; }
+inline static U32x3  u32x3_clamp(const U32x3 &v, const U32x3 &lo, const U32x3 &hi) { return u32x3_min(u32x3_max(v, lo), hi); }
+
+static constexpr U32x3 U32X3_ZERO = {0u, 0u, 0u};
+static constexpr U32x3 U32X3_ONE  = {1u, 1u, 1u};
diff --git a/core/math/u32x4.h b/core/math/u32x4.h
new file mode 100644
index 00000000..c25fff3e
--- /dev/null
+++ b/core/math/u32x4.h
@@ -0,0 +1,151 @@
+#pragma once
+
+#include <core/defines.h>
+#include <core/math/u32.h>
+
+// ============================================================================
+// U32x4 — 4D U32 vector, SIMD-backed. SIMD masks, packed flags.
+// ============================================================================
+
+#if defined(SIMD_FORCE_SCALAR)
+	struct Simd_U32x4 { U32 v[4]; };
+#elif defined(SIMD_NEON)
+	#include <arm_neon.h>
+	typedef uint32x4_t Simd_U32x4;
+#elif defined(SIMD_AVX)
+	#include <immintrin.h>
+	typedef __m128i Simd_U32x4;
+#else
+	struct Simd_U32x4 { U32 v[4]; };
+#endif
+
+struct alignas(16) U32x4
+{
+	union
+	{
+		struct { U32 x, y, z, w; };
+		Simd_U32x4 simd;
+	};
+};
+
+// ---- Operators -------------------------------------------------------------
+
+inline static U32x4
+operator+(const U32x4 &a, const U32x4 &b)
+{
+	U32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vaddq_u32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_add_epi32(a.simd, b.simd);
+#else
+	r.x = a.x + b.x; r.y = a.y + b.y; r.z = a.z + b.z; r.w = a.w + b.w;
+#endif
+	return r;
+}
+
+inline static U32x4 &
+operator+=(U32x4 &a, const U32x4 &b) { a = a + b; return a; }
+
+inline static U32x4
+operator-(const U32x4 &a, const U32x4 &b)
+{
+	U32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vsubq_u32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_sub_epi32(a.simd, b.simd);
+#else
+	r.x = a.x - b.x; r.y = a.y - b.y; r.z = a.z - b.z; r.w = a.w - b.w;
+#endif
+	return r;
+}
+
+inline static U32x4 &
+operator-=(U32x4 &a, const U32x4 &b) { a = a - b; return a; }
+
+inline static U32x4
+operator*(const U32x4 &a, U32 s)
+{
+	U32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vmulq_n_u32(a.simd, s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_mullo_epi32(a.simd, _mm_set1_epi32((I32)s));
+#else
+	r.x = a.x * s; r.y = a.y * s; r.z = a.z * s; r.w = a.w * s;
+#endif
+	return r;
+}
+
+inline static U32x4
+operator*(U32 s, const U32x4 &a) { return a * s; }
+
+inline static U32x4 &
+operator*=(U32x4 &a, U32 s) { a = a * s; return a; }
+
+inline static bool
+operator==(const U32x4 &a, const U32x4 &b) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; }
+
+// ---- Free functions --------------------------------------------------------
+
+inline static U32x4
+u32x4_from_u32(U32 s)
+{
+	U32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vdupq_n_u32(s);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_set1_epi32((I32)s);
+#else
+	r.x = s; r.y = s; r.z = s; r.w = s;
+#endif
+	return r;
+}
+
+inline static U32
+u32x4_dot(const U32x4 &a, const U32x4 &b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; }
+
+inline static U32
+u32x4_length_squared(const U32x4 &a) { return u32x4_dot(a, a); }
+
+inline static U32x4
+u32x4_min(const U32x4 &a, const U32x4 &b)
+{
+	U32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vminq_u32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_min_epu32(a.simd, b.simd);  // SSE4.1+
+#else
+	r.x = u32_min(a.x, b.x); r.y = u32_min(a.y, b.y);
+	r.z = u32_min(a.z, b.z); r.w = u32_min(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static U32x4
+u32x4_max(const U32x4 &a, const U32x4 &b)
+{
+	U32x4 r;
+#if defined(SIMD_NEON)
+	r.simd = vmaxq_u32(a.simd, b.simd);
+#elif defined(SIMD_AVX)
+	r.simd = _mm_max_epu32(a.simd, b.simd);  // SSE4.1+
+#else
+	r.x = u32_max(a.x, b.x); r.y = u32_max(a.y, b.y);
+	r.z = u32_max(a.z, b.z); r.w = u32_max(a.w, b.w);
+#endif
+	return r;
+}
+
+inline static U32x4
+u32x4_clamp(const U32x4 &v, const U32x4 &lo, const U32x4 &hi)
+{
+	return u32x4_min(u32x4_max(v, lo), hi);
+}
+
+// ---- Constants -------------------------------------------------------------
+
+static constexpr U32x4 U32X4_ZERO = {0u, 0u, 0u, 0u};
+static constexpr U32x4 U32X4_ONE  = {1u, 1u, 1u, 1u};
diff --git a/core/math/u64.h b/core/math/u64.h
new file mode 100644
index 00000000..06c60eab
--- /dev/null
+++ b/core/math/u64.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <core/defines.h>
+
+// ============================================================================
+// U64 scalar helpers. U64_MAX lives in core/defines.h. No abs/sign for unsigned.
+// ============================================================================
+
+inline static U64
+u64_min(U64 a, U64 b)
+{
+	return a < b ? a : b;
+}
+
+inline static U64
+u64_max(U64 a, U64 b)
+{
+	return a > b ? a : b;
+}
+
+inline static U64
+u64_clamp(U64 x, U64 lo, U64 hi)
+{
+	if (x < lo) return lo;
+	if (x > hi) return hi;
+	return x;
+}
diff --git a/core/memory/arena_allocator.cpp b/core/memory/arena_allocator.cpp
index ab963429..38dc2894 100644
--- a/core/memory/arena_allocator.cpp
+++ b/core/memory/arena_allocator.cpp
@@ -10,20 +10,20 @@ namespace memory
 {
 	struct Arena_Allocator_Node
 	{
-		u64 capacity;
-		u64 used;
+		U64 capacity;
+		U64 used;
 		Arena_Allocator_Node *next;
 	};
 
 	struct Arena_Allocator_Context
 	{
 		Allocator *allocator;
-		u64 used_size;
-		u64 peak_size;
+		U64 used_size;
+		U64 peak_size;
 		Arena_Allocator_Node *head;
 	};
 
-	Arena_Allocator::Arena_Allocator(u64 initial_capacity, Allocator *allocator)
+	Arena_Allocator::Arena_Allocator(U64 initial_capacity, Allocator *allocator)
 	{
 		Arena_Allocator *self = this;
 		self->ctx = memory::allocate_zeroed<Arena_Allocator_Context>(allocator);
@@ -32,7 +32,7 @@ namespace memory
 
 		self->ctx->allocator = allocator;
 
-		self->ctx->head = (Arena_Allocator_Node *)memory::allocate(allocator, sizeof(Arena_Allocator_Node) + initial_capacity);
+		self->ctx->head = (Arena_Allocator_Node *)memory::allocate(allocator, sizeof(Arena_Allocator_Node) + initial_capacity, alignof(Arena_Allocator_Node));
 		if (self->ctx->head == nullptr)
 			log_fatal("[ARENA_ALLOCATOR]: Could not allocate memory with given size {}.", sizeof(Arena_Allocator_Node) + initial_capacity);
 
@@ -64,32 +64,46 @@ namespace memory
 	}
 
 	void *
-	Arena_Allocator::allocate(u64 size)
+	Arena_Allocator::allocate(U64 size, U64 alignment)
 	{
 		Arena_Allocator *self = this;
-		self->ctx->used_size += size;
+
+		// Bump-pointer with alignment: round the current position up to the next
+		// multiple of `alignment`, then carve out `size` bytes.
+		auto head_start   = (U64)(self->ctx->head + 1);
+		auto cur_pos      = head_start + self->ctx->head->used;
+		auto aligned_pos  = (cur_pos + (alignment - 1)) & ~((U64)alignment - 1);
+		U64  padding      = (U64)(aligned_pos - cur_pos);
+		U64  consumed     = padding + size;
+
+		self->ctx->used_size += consumed;
 		self->ctx->peak_size = self->ctx->used_size > self->ctx->peak_size ? self->ctx->used_size : self->ctx->peak_size;
 
-		if (self->ctx->head->used + size <= self->ctx->head->capacity)
+		if (self->ctx->head->used + consumed <= self->ctx->head->capacity)
 		{
-			auto data = (u8 *)(self->ctx->head + 1) + self->ctx->head->used;
-			self->ctx->head->used += size;
-			return data;
+			self->ctx->head->used += consumed;
+			return (void *)aligned_pos;
 		}
 		else
 		{
-			auto capacity = size > self->ctx->head->capacity ? size : self->ctx->head->capacity;
-			auto node = (Arena_Allocator_Node *)memory::allocate(self->ctx->allocator, sizeof(Arena_Allocator_Node) + capacity);
+			// Worst-case alignment padding inside a brand-new node is (alignment - 1).
+			// Ensure the new node has enough room for the aligned allocation.
+			U64 min_capacity = size + alignment;
+			U64 capacity     = min_capacity > self->ctx->head->capacity ? min_capacity : self->ctx->head->capacity;
+			auto node = (Arena_Allocator_Node *)memory::allocate(self->ctx->allocator, sizeof(Arena_Allocator_Node) + capacity, alignof(Arena_Allocator_Node));
 			if (node == nullptr)
 				log_fatal("[ARENA_ALLOCATOR]: Could not allocate memory with given size {}.", size);
+
+			auto new_payload_start = (U64)(node + 1);
+			auto new_aligned_pos   = (new_payload_start + (alignment - 1)) & ~((U64)alignment - 1);
+			U64  new_padding       = (U64)(new_aligned_pos - new_payload_start);
+
 			node->capacity  = capacity;
-			node->used      = size;
+			node->used      = new_padding + size;
 			node->next      = self->ctx->head;
 			self->ctx->head = node;
 
-			// log_debug("[ARENA_ALLOCATOR]: Allocated a new node with given capacity {}.", capacity);
-
-			return node + 1;
+			return (void *)new_aligned_pos;
 		}
 	}
 
@@ -113,7 +127,7 @@ namespace memory
 				node = next;
 			}
 
-			self->ctx->head = (Arena_Allocator_Node *)memory::allocate(self->ctx->allocator, sizeof(Arena_Allocator_Node) + self->ctx->peak_size);
+			self->ctx->head = (Arena_Allocator_Node *)memory::allocate(self->ctx->allocator, sizeof(Arena_Allocator_Node) + self->ctx->peak_size, alignof(Arena_Allocator_Node));
 			if (self->ctx->head == nullptr)
 				log_fatal("[ARENA_ALLOCATOR]: Could not allocate memory with given size {}.", sizeof(Arena_Allocator_Node) + self->ctx->peak_size);
 			self->ctx->head->capacity = self->ctx->peak_size;
@@ -126,7 +140,7 @@ namespace memory
 	}
 
 	Arena_Allocator *
-	arena_allocator_init(u64 initial_capacity, Allocator *allocator)
+	arena_allocator_init(U64 initial_capacity, Allocator *allocator)
 	{
 		return allocate_and_call_constructor<Arena_Allocator>(allocator, initial_capacity, allocator);
 	}
@@ -138,9 +152,9 @@ namespace memory
 	}
 
 	void *
-	arena_allocator_allocate(Arena_Allocator *self, u64 size)
+	arena_allocator_allocate(Arena_Allocator *self, U64 size, U64 alignment)
 	{
-		return self->allocate(size);
+		return self->allocate(size, alignment);
 	}
 
 	void
@@ -155,13 +169,13 @@ namespace memory
 		self->clear();
 	}
 
-	u64
+	U64
 	arena_allocator_get_used_size(Arena_Allocator *self)
 	{
 		return self->ctx->used_size;
 	}
 
-	u64
+	U64
 	arena_allocator_get_peak_size(Arena_Allocator *self)
 	{
 		return self->ctx->peak_size;
diff --git a/core/memory/arena_allocator.h b/core/memory/arena_allocator.h
index 249493f3..2150f9b2 100644
--- a/core/memory/arena_allocator.h
+++ b/core/memory/arena_allocator.h
@@ -6,18 +6,18 @@
 
 namespace memory
 {
-	static constexpr const u64 ARENA_ALLOCATOR_INITIAL_CAPACITY = 4 * 1024 * 1024ULL;
+	static constexpr const U64 ARENA_ALLOCATOR_INITIAL_CAPACITY = 4 * 1024 * 1024ULL;
 
 	struct Arena_Allocator : Allocator
 	{
 		struct Arena_Allocator_Context *ctx;
 
-		Arena_Allocator(u64 initial_capacity = ARENA_ALLOCATOR_INITIAL_CAPACITY, Allocator *allocator = heap_allocator());
+		Arena_Allocator(U64 initial_capacity = ARENA_ALLOCATOR_INITIAL_CAPACITY, Allocator *allocator = heap_allocator());
 
 		~Arena_Allocator() override;
 
 		void *
-		allocate(u64 size) override;
+		allocate(U64 size, U64 alignment) override;
 
 		void
 		deallocate(void *data) override;
@@ -27,13 +27,13 @@ namespace memory
 	};
 
 	CORE_API Arena_Allocator *
-	arena_allocator_init(u64 initial_capacity = ARENA_ALLOCATOR_INITIAL_CAPACITY, Allocator *allocator = heap_allocator());
+	arena_allocator_init(U64 initial_capacity = ARENA_ALLOCATOR_INITIAL_CAPACITY, Allocator *allocator = heap_allocator());
 
 	CORE_API void
 	arena_allocator_deinit(Arena_Allocator *self);
 
 	CORE_API void *
-	arena_allocator_allocate(Arena_Allocator *self, u64 size);
+	arena_allocator_allocate(Arena_Allocator *self, U64 size, U64 alignment);
 
 	CORE_API void
 	arena_allocator_deallocate(Arena_Allocator *self, void *data);
@@ -41,9 +41,9 @@ namespace memory
 	CORE_API void
 	arena_allocator_clear(Arena_Allocator *self);
 
-	CORE_API u64
+	CORE_API U64
 	arena_allocator_get_used_size(Arena_Allocator *self);
 
-	CORE_API u64
+	CORE_API U64
 	arena_allocator_get_peak_size(Arena_Allocator *self);
 }
\ No newline at end of file
diff --git a/core/memory/heap_allocator.cpp b/core/memory/heap_allocator.cpp
index f31781d4..566792c4 100644
--- a/core/memory/heap_allocator.cpp
+++ b/core/memory/heap_allocator.cpp
@@ -13,12 +13,17 @@
 namespace memory
 {
 #if DEBUG
-	inline static constexpr u32 CALLSTACK_MAX_FRAME_COUNT = 20;
+	inline static constexpr U32 CALLSTACK_MAX_FRAME_COUNT = 20;
+
+	// In DEBUG we track each live allocation in a linked list. The node lives in its
+	// own ::malloc (independent from the user's aligned payload) and is reachable O(1)
+	// from the user pointer via a back-pointer slot placed just before the aligned data.
 	struct Heap_Allocator_Node
 	{
-		u64 size;
+		U64 size;
+		void *data;
 		void *callstack[CALLSTACK_MAX_FRAME_COUNT];
-		u32 callstack_frame_count;
+		U32 callstack_frame_count;
 		Heap_Allocator_Node *next;
 		Heap_Allocator_Node *prev;
 	};
@@ -30,6 +35,43 @@ namespace memory
 	};
 #endif
 
+	// Allocate aligned memory via plain malloc, reserving extra slot(s) before the returned
+	// pointer for bookkeeping. Layout:
+	//
+	//   [ ... wasted padding ... ][ slot[-N] ... slot[-1] ][ user data aligned ]
+	//                             ^ header_slots * sizeof(void*) before user data
+	//
+	// slot[-1] always stores the raw ::malloc base so deallocate() can free it.
+	// slot[-2] (when reserved) stores the debug Heap_Allocator_Node pointer.
+	//
+	// This works with any alignment >= alignof(void*) and avoids the _aligned_malloc vs
+	// posix_memalign vs free/_aligned_free platform fork — single ::malloc/::free path.
+	static void *
+	_aligned_malloc_with_slots(U64 size, U64 alignment, U64 extra_slots_before)
+	{
+		if (size == 0)
+			return nullptr;
+
+		U64 min_align = alignment;
+		if (min_align < alignof(void *))
+			min_align = alignof(void *);
+
+		U64 header_bytes = sizeof(void *) * (extra_slots_before + 1);
+		U64 total = size + header_bytes + (min_align - 1);
+
+		void *raw = ::malloc(total);
+		if (raw == nullptr)
+			return nullptr;
+
+		U64 raw_addr = (U64)raw;
+		U64 user_addr = (raw_addr + header_bytes + (min_align - 1)) & ~((U64)min_align - 1);
+
+		void **slots = (void **)user_addr;
+		slots[-1] = raw;
+
+		return (void *)user_addr;
+	}
+
 	Heap_Allocator::Heap_Allocator()
 	{
 #if DEBUG
@@ -46,10 +88,13 @@ namespace memory
 #if DEBUG
 		Heap_Allocator *self = this;
 		if (self->ctx->head == nullptr)
+		{
+			::delete self->ctx;
 			return;
+		}
 
-		u64 total_leak_count = 0;
-		u64 total_leak_size  = 0;
+		U64 total_leak_count = 0;
+		U64 total_leak_size  = 0;
 
 		// TODO: Use logger.
 		::printf("memory leak detected:\n");
@@ -79,41 +124,45 @@ namespace memory
 	}
 
 	void *
-	Heap_Allocator::allocate(u64 size)
+	Heap_Allocator::allocate(U64 size, U64 alignment)
 	{
+		if (size == 0)
+			return nullptr;
+
 #if DEBUG
+		// Reserve 2 slots before user data: slot[-1]=raw base, slot[-2]=debug node pointer.
+		void *data = _aligned_malloc_with_slots(size, alignment, /* extra_slots_before */ 1);
+		if (data == nullptr)
+			log_fatal("[HEAP_ALLOCATOR]: Could not allocate memory with size {} alignment {}.", size, alignment);
+
 		Heap_Allocator *self = this;
 
-		// TODO:
-		Heap_Allocator_Node *node = (Heap_Allocator_Node *)::malloc(sizeof(Heap_Allocator_Node) + size);
+		Heap_Allocator_Node *node = (Heap_Allocator_Node *)::malloc(sizeof(Heap_Allocator_Node));
 		if (node == nullptr)
-			log_fatal("[HEAP_ALLOCATOR]: Could not allocate memory with given size {}.", size);
-
-		if (node != nullptr && size != 0)
-		{
-			node->size = size;
-			node->next = nullptr;
-
-			self->ctx->mutex.lock();
-			{
-				node->prev = self->ctx->head;
-				if (self->ctx->head != nullptr)
-					self->ctx->head->next = node;
-				self->ctx->head = node;
-			}
-			self->ctx->mutex.unlock();
+			log_fatal("[HEAP_ALLOCATOR]: Could not allocate debug tracking node.");
 
-			node->callstack_frame_count = platform_callstack_capture(node->callstack, CALLSTACK_MAX_FRAME_COUNT);
+		node->size = size;
+		node->data = data;
+		node->next = nullptr;
+		node->callstack_frame_count = platform_callstack_capture(node->callstack, CALLSTACK_MAX_FRAME_COUNT);
 
-			return (node + 1);
+		self->ctx->mutex.lock();
+		{
+			node->prev = self->ctx->head;
+			if (self->ctx->head != nullptr)
+				self->ctx->head->next = node;
+			self->ctx->head = node;
 		}
+		self->ctx->mutex.unlock();
+
+		((void **)data)[-2] = node;
 
-		return nullptr;
+		return data;
 #else
-		void *data = ::malloc(size);
-		// TODO:
+		// Release: only the raw-base back-pointer slot, no debug node.
+		void *data = _aligned_malloc_with_slots(size, alignment, /* extra_slots_before */ 0);
 		if (data == nullptr)
-			log_fatal("[HEAP_ALLOCATOR]: Could not allocate memory with given size {}.", size);
+			log_fatal("[HEAP_ALLOCATOR]: Could not allocate memory with size {} alignment {}.", size, alignment);
 		return data;
 #endif
 	}
@@ -121,12 +170,17 @@ namespace memory
 	void
 	Heap_Allocator::deallocate(void *data)
 	{
+		if (data == nullptr)
+			return;
+
+		void **slots = (void **)data;
+		void *raw = slots[-1];
+
 #if DEBUG
 		Heap_Allocator *self = this;
-		if (data != nullptr)
+		Heap_Allocator_Node *node = (Heap_Allocator_Node *)slots[-2];
+		if (node != nullptr)
 		{
-			Heap_Allocator_Node *node = ((Heap_Allocator_Node *)data) - 1;
-
 			self->ctx->mutex.lock();
 			{
 				if (node == self->ctx->head)
@@ -142,9 +196,9 @@ namespace memory
 
 			::free(node);
 		}
-#else
-		::free(data);
 #endif
+
+		::free(raw);
 	}
 
 	Heap_Allocator *
@@ -160,9 +214,9 @@ namespace memory
 	}
 
 	void *
-	heap_allocator_allocate(Heap_Allocator *self, u64 size)
+	heap_allocator_allocate(Heap_Allocator *self, U64 size, U64 alignment)
 	{
-		return self->allocate(size);
+		return self->allocate(size, alignment);
 	}
 
 	void
@@ -170,4 +224,4 @@ namespace memory
 	{
 		self->deallocate(data);
 	}
-}
\ No newline at end of file
+}
diff --git a/core/memory/heap_allocator.h b/core/memory/heap_allocator.h
index 50d9826e..d961b6ab 100644
--- a/core/memory/heap_allocator.h
+++ b/core/memory/heap_allocator.h
@@ -16,7 +16,7 @@ namespace memory
 		~Heap_Allocator() override;
 
 		void *
-		allocate(u64 size) override;
+		allocate(U64 size, U64 alignment) override;
 
 		void
 		deallocate(void *data) override;
@@ -29,7 +29,7 @@ namespace memory
 	heap_allocator_deinit(Heap_Allocator *self);
 
 	CORE_API void *
-	heap_allocator_allocate(Heap_Allocator *self, u64 size);
+	heap_allocator_allocate(Heap_Allocator *self, U64 size, U64 alignment);
 
 	CORE_API void
 	heap_allocator_deallocate(Heap_Allocator *self, void *data);
diff --git a/core/memory/memory.h b/core/memory/memory.h
index 16d99294..5bfcaa2e 100644
--- a/core/memory/memory.h
+++ b/core/memory/memory.h
@@ -9,7 +9,6 @@
 
 /*
 	TODO:
-	- [ ] Memory alignment.
 	- [ ] Rename this file to allocator.h?
 	- [ ] Add meta allocator to arena and pool allocators?
 */
@@ -22,7 +21,7 @@ namespace memory
 		~Allocator() = default;
 
 		virtual void *
-		allocate(u64 size) = 0;
+		allocate(U64 size, U64 alignment) = 0;
 
 		virtual void
 		deallocate(void *data) = 0;
@@ -37,33 +36,36 @@ namespace memory
 	CORE_API Allocator *
 	temp_allocator();
 
+	// Untyped allocation — alignment required (no default).
 	inline static void *
-	allocate(u64 size)
+	allocate(U64 size, U64 alignment)
 	{
 		auto allocator = heap_allocator();
-		return allocator->allocate(size);
+		return allocator->allocate(size, alignment);
 	}
 
 	inline static void *
-	allocate(Allocator *allocator, u64 size)
+	allocate(Allocator *allocator, U64 size, U64 alignment)
 	{
-		return allocator->allocate(size);
+		return allocator->allocate(size, alignment);
 	}
 
+	// Typed allocation — alignment auto from alignof(T), count optional.
 	template <typename T>
 	inline static T *
-	allocate()
+	allocate(U64 count = 1)
 	{
-		return (T *)allocate(sizeof(T));
+		return (T *)allocate(sizeof(T) * count, alignof(T));
 	}
 
 	template <typename T>
 	inline static T *
-	allocate(Allocator *allocator)
+	allocate(Allocator *allocator, U64 count = 1)
 	{
-		return (T *)allocate(allocator, sizeof(T));
+		return (T *)allocate(allocator, sizeof(T) * count, alignof(T));
 	}
 
+	// Constructor-calling typed allocation.
 	template <typename T, typename ...TArgs>
 	inline static T *
 	allocate_and_call_constructor(TArgs &&...args)
@@ -82,36 +84,40 @@ namespace memory
 		return data;
 	}
 
+	// Zeroed allocation — mirrors the non-zeroed family.
 	inline static void *
-	allocate_zeroed(u64 size)
+	allocate_zeroed(U64 size, U64 alignment)
 	{
-		void *data = allocate(size);
-		::memset(data, 0, size);
+		void *data = allocate(size, alignment);
+		if (data != nullptr)
+			::memset(data, 0, size);
 		return data;
 	}
 
 	inline static void *
-	allocate_zeroed(Allocator *allocator, u64 size)
+	allocate_zeroed(Allocator *allocator, U64 size, U64 alignment)
 	{
-		void *data = allocate(allocator, size);
-		::memset(data, 0, size);
+		void *data = allocate(allocator, size, alignment);
+		if (data != nullptr)
+			::memset(data, 0, size);
 		return data;
 	}
 
 	template <typename T>
 	inline static T *
-	allocate_zeroed()
+	allocate_zeroed(U64 count = 1)
 	{
-		return (T *)allocate_zeroed(sizeof(T));
+		return (T *)allocate_zeroed(sizeof(T) * count, alignof(T));
 	}
 
 	template <typename T>
 	inline static T *
-	allocate_zeroed(Allocator *allocator)
+	allocate_zeroed(Allocator *allocator, U64 count = 1)
 	{
-		return (T *)allocate_zeroed(allocator, sizeof(T));
+		return (T *)allocate_zeroed(allocator, sizeof(T) * count, alignof(T));
 	}
 
+	// Deallocation — unchanged signature (allocator tracks alignment internally).
 	inline static void
 	deallocate(void *data)
 	{
@@ -140,4 +146,4 @@ namespace memory
 		data->~T();
 		deallocate(allocator, data);
 	}
-}
\ No newline at end of file
+}
diff --git a/core/memory/pool_allocator.cpp b/core/memory/pool_allocator.cpp
index 5db74646..f69ad39c 100644
--- a/core/memory/pool_allocator.cpp
+++ b/core/memory/pool_allocator.cpp
@@ -14,10 +14,10 @@ namespace memory
 	{
 		Arena_Allocator *arena;
 		Pool_Allocator_Node *head;
-		u64 chunk_size;
+		U64 chunk_size;
 	};
 
-	Pool_Allocator::Pool_Allocator(u64 chunk_size, u64 chunk_count)
+	Pool_Allocator::Pool_Allocator(U64 chunk_size, U64 chunk_count)
 	{
 		Pool_Allocator *self = this;
 		self->ctx = memory::allocate_zeroed<Pool_Allocator_Context>();
@@ -39,12 +39,20 @@ namespace memory
 	}
 
 	void *
-	Pool_Allocator::allocate(u64)
+	Pool_Allocator::allocate(U64, U64 alignment)
 	{
 		Pool_Allocator *self = this;
+		if (alignment > alignof(void *))
+		{
+			// Pool free-list threads a next-pointer at the start of each free chunk,
+			// so chunks are inherently sizeof(void*)-aligned. Over-aligned requests are
+			// not supported — allocate them through the arena directly if needed.
+			log_fatal("[POOL_ALLOCATOR]: Requested alignment {} exceeds pool's guaranteed {} byte alignment.", alignment, alignof(void *));
+		}
+
 		if(self->ctx->head == nullptr)
 		{
-			void *result = arena_allocator_allocate(self->ctx->arena, self->ctx->chunk_size);
+			void *result = arena_allocator_allocate(self->ctx->arena, self->ctx->chunk_size, alignof(void *));
 			::memset(result, 0, self->ctx->chunk_size);
 			return result;
 		}
@@ -84,7 +92,7 @@ namespace memory
 	}
 
 	Pool_Allocator *
-	pool_allocator_init(u64 chunk_size, u64 chunk_count)
+	pool_allocator_init(U64 chunk_size, U64 chunk_count)
 	{
 		return allocate_and_call_constructor<Pool_Allocator>(chunk_size, chunk_count);
 	}
diff --git a/core/memory/pool_allocator.h b/core/memory/pool_allocator.h
index 3dbc1df7..321a5f80 100644
--- a/core/memory/pool_allocator.h
+++ b/core/memory/pool_allocator.h
@@ -9,19 +9,19 @@ namespace memory
 	{
 		struct Pool_Allocator_Context *ctx;
 
-		Pool_Allocator(u64 chunk_size, u64 chunk_count);
+		Pool_Allocator(U64 chunk_size, U64 chunk_count);
 
 		~Pool_Allocator() override;
 
 		void *
-		allocate(u64 size = 0) override;
+		allocate(U64 size = 0, U64 alignment = alignof(void *)) override;
 
 		void
 		deallocate(void *data) override;
 	};
 
 	CORE_API Pool_Allocator *
-	pool_allocator_init(u64 chunk_size, u64 chunk_count);
+	pool_allocator_init(U64 chunk_size, U64 chunk_count);
 
 	CORE_API void
 	pool_allocator_deinit(Pool_Allocator *self);
diff --git a/core/platform/platform.h b/core/platform/platform.h
index bf7d28ef..6c3c415f 100644
--- a/core/platform/platform.h
+++ b/core/platform/platform.h
@@ -14,6 +14,55 @@ platform_file_read(const char *file_path, memory::Allocator *allocator = memory:
 	return platform_file_read(string_literal(file_path), allocator);
 }
 
+// ============================================================
+// Handle-based file I/O (used by File_Stream)
+// ============================================================
+
+typedef void *Platform_File_Handle;
+#define PLATFORM_FILE_HANDLE_INVALID nullptr
+
+enum Platform_File_Mode
+{
+	PLATFORM_FILE_MODE_READ,
+	PLATFORM_FILE_MODE_WRITE,
+	PLATFORM_FILE_MODE_READ_WRITE,
+	PLATFORM_FILE_MODE_APPEND,
+};
+
+enum Platform_File_Seek_Origin
+{
+	PLATFORM_FILE_SEEK_ORIGIN_BEGIN,
+	PLATFORM_FILE_SEEK_ORIGIN_CURRENT,
+	PLATFORM_FILE_SEEK_ORIGIN_END,
+};
+
+CORE_API Platform_File_Handle
+platform_file_open(const String &path, Platform_File_Mode mode);
+
+inline static Platform_File_Handle
+platform_file_open(const char *path, Platform_File_Mode mode)
+{
+	return platform_file_open(string_literal(path), mode);
+}
+
+CORE_API void
+platform_file_close(Platform_File_Handle handle);
+
+CORE_API U64
+platform_file_read(Platform_File_Handle handle, void *data, U64 size);
+
+CORE_API U64
+platform_file_write(Platform_File_Handle handle, const void *data, U64 size);
+
+CORE_API bool
+platform_file_seek(Platform_File_Handle handle, I64 offset, Platform_File_Seek_Origin origin);
+
+CORE_API U64
+platform_file_tell(Platform_File_Handle handle);
+
+CORE_API U64
+platform_file_size(Platform_File_Handle handle);
+
 CORE_API bool
 platform_path_is_valid(const String &path);
 
@@ -93,28 +142,28 @@ platform_path_read_file(const char *path, memory::Allocator *allocator = memory:
 	return platform_path_read_file(string_literal(path), allocator);
 }
 
-CORE_API u64
+CORE_API U64
 platform_path_write_file(const String &path, Block block);
 
-inline static u64
+inline static U64
 platform_path_write_file(const String &path, const String &content)
 {
 	return platform_path_write_file(path, Block{(void *)content.data, content.count});
 }
 
-inline static u64
+inline static U64
 platform_path_write_file(const String &path, const char *content)
 {
 	return platform_path_write_file(path, string_literal(content));
 }
 
-inline static u64
+inline static U64
 platform_path_write_file(const char *path, const String &content)
 {
 	return platform_path_write_file(string_literal(path), Block{(void *)content.data, content.count});
 }
 
-inline static u64
+inline static U64
 platform_path_write_file(const char *path, const char *content)
 {
 	return platform_path_write_file(string_literal(path), string_literal(content));
@@ -163,20 +212,20 @@ typedef struct Platform_Api
 	char filepath[4096];
 	void *handle;
 	void *api;
-	i64 last_write_time;
+	I64 last_write_time;
 } Platform_Api;
 
 typedef struct Platform_Memory
 {
-	u8 *ptr;
-	u64 size;
+	U8 *ptr;
+	U64 size;
 } Platform_Memory;
 
 typedef struct Platform_Allocator
 {
-	u8 *ptr;
-	u64 size;
-	u64 used;
+	U8 *ptr;
+	U64 size;
+	U64 used;
 } Platform_Allocator;
 
 typedef struct Platform_Thread Platform_Thread;
@@ -288,55 +337,55 @@ typedef struct Platform_Key_State
 	bool released; // Once we release.
 	bool down;
 
-	i32 press_count;
-	i32 release_count;
+	I32 press_count;
+	I32 release_count;
 } Platform_Key_State;
 
 typedef struct Platform_Input
 {
-	i32 mouse_x, mouse_y;
-	i32 mouse_dx, mouse_dy;
-	f32 mouse_wheel;
+	I32 mouse_x, mouse_y;
+	I32 mouse_dx, mouse_dy;
+	F32 mouse_wheel;
 	Platform_Key_State keys[PLATFORM_KEY_COUNT];
 } Platform_Input;
 
 typedef struct Platform_Window
 {
 	void *handle; // TODO: Rename to context.
-	u32 width, height;
+	U32 width, height;
 	Platform_Input input;
 } Platform_Window;
 
 typedef struct Glyph
 {
-	i32 codepoint;
-	i32 yadvance;
-	u32 width;
-	u32 height;
-	f32 uv_min_x;
-	f32 uv_min_y;
-	f32 uv_max_x;
-	f32 uv_max_y;
+	I32 codepoint;
+	I32 yadvance;
+	U32 width;
+	U32 height;
+	F32 uv_min_x;
+	F32 uv_min_y;
+	F32 uv_max_x;
+	F32 uv_max_y;
 } Glyph;
 
 typedef struct Platform_Font
 {
 	// Font data.
-	i32 ascent;
-	i32 descent;
-	i32 line_spacing;
-	u32 whitespace_width;
-	u32 max_glyph_height;
-	i32 *kerning_table;
+	I32 ascent;
+	I32 descent;
+	I32 line_spacing;
+	U32 whitespace_width;
+	U32 max_glyph_height;
+	I32 *kerning_table;
 
 	// Font glyphs.
 	Glyph *glyphs;
-	u32 glyph_count;
+	U32 glyph_count;
 
 	// Font atlas.
-	u8 *atlas;
-	u32 atlas_width;
-	u32 atlas_height;
+	U8 *atlas;
+	U32 atlas_width;
+	U32 atlas_height;
 } Platform_Font;
 
 
@@ -351,13 +400,13 @@ platform_api_load(Platform_Api *self);
 
 
 CORE_API Platform_Allocator
-platform_allocator_init(u64 size_in_bytes);
+platform_allocator_init(U64 size_in_bytes);
 
 CORE_API void
 platform_allocator_deinit(Platform_Allocator *self);
 
 CORE_API Platform_Memory
-platform_allocator_alloc(Platform_Allocator *self, u64 size_in_bytes);
+platform_allocator_alloc(Platform_Allocator *self, U64 size_in_bytes);
 
 CORE_API void
 platform_allocator_clear(Platform_Allocator *self);
@@ -374,7 +423,7 @@ platform_thread_run(Platform_Thread *self, void (*function)(void *), void *user_
 
 
 CORE_API Platform_Window
-platform_window_init(u32 width, u32 height, const char *title);
+platform_window_init(U32 width, U32 height, const char *title);
 
 CORE_API void
 platform_window_deinit(Platform_Window *self);
@@ -405,13 +454,13 @@ platform_set_current_directory();
 CORE_API bool
 platform_file_exists(const char *filepath);
 
-CORE_API u64
+CORE_API U64
 platform_file_size(const char *filepath);
 
-CORE_API u64
+CORE_API U64
 platform_file_read(const char *filepath, Platform_Memory mem);
 
-CORE_API u64
+CORE_API U64
 platform_file_write(const char *filepath, Platform_Memory mem);
 
 CORE_API bool
@@ -429,7 +478,7 @@ platform_file_delete(const char *filepath);
  * Note that in case the path was larger than the supplied buffer, the dialog will return 'false'.
  */
 CORE_API bool
-platform_file_dialog_open(char *path, u32 path_length, const char *filters);
+platform_file_dialog_open(char *path, U32 path_length, const char *filters);
 
 /**
  * @brief Opens a file dialog for saving.
@@ -440,23 +489,23 @@ platform_file_dialog_open(char *path, u32 path_length, const char *filters);
  * Note that in case the path was larger than the supplied buffer, the dialog will return 'false'.
  */
 CORE_API bool
-platform_file_dialog_save(char *path, u32 path_length, const char *filters);
+platform_file_dialog_save(char *path, U32 path_length, const char *filters);
 
 
-CORE_API u64
+CORE_API U64
 platform_query_microseconds(void);
 
 CORE_API void
-platform_sleep_set_period(u32 period);
+platform_sleep_set_period(U32 period);
 
 CORE_API void
-platform_sleep(u32 milliseconds);
+platform_sleep(U32 milliseconds);
 
-CORE_API u32
-platform_callstack_capture(void **callstack, u32 frame_count);
+CORE_API U32
+platform_callstack_capture(void **callstack, U32 frame_count);
 
 CORE_API void
-platform_callstack_log(void **callstack, u32 frame_count);
+platform_callstack_log(void **callstack, U32 frame_count);
 
 /**
  * @brief Loads the font at the specified path, and extracts information about glyphs from it.
@@ -468,7 +517,7 @@ platform_callstack_log(void **callstack, u32 frame_count);
  * The font atlas stores only the alpha channel of the font glyphs.
  */
 CORE_API Platform_Font
-platform_font_init(const char *filepath, const char *face_name, u32 font_height, bool origin_top_left);
+platform_font_init(const char *filepath, const char *face_name, U32 font_height, bool origin_top_left);
 
 /**
  * @brief Frees resources held by a previously loaded 'Font' structure.
diff --git a/core/platform/platform_linux.cpp b/core/platform/platform_linux.cpp
index 7f1c24b2..58647449 100644
--- a/core/platform/platform_linux.cpp
+++ b/core/platform/platform_linux.cpp
@@ -882,6 +882,69 @@ platform_file_write(const char *filepath, Platform_Memory mem)
 	return bytes_written;
 }
 
+Platform_File_Handle
+platform_file_open(const String &path, Platform_File_Mode mode)
+{
+	int flags = 0;
+	switch (mode)
+	{
+		case PLATFORM_FILE_MODE_READ:       flags = O_RDONLY;                     break;
+		case PLATFORM_FILE_MODE_WRITE:      flags = O_WRONLY | O_CREAT | O_TRUNC; break;
+		case PLATFORM_FILE_MODE_READ_WRITE: flags = O_RDWR   | O_CREAT;           break;
+		case PLATFORM_FILE_MODE_APPEND:     flags = O_WRONLY | O_CREAT | O_APPEND; break;
+	}
+	int fd = ::open(path.data, flags, S_IRWXU);
+	return fd == -1 ? PLATFORM_FILE_HANDLE_INVALID : (Platform_File_Handle)(iptr)fd;
+}
+
+void
+platform_file_close(Platform_File_Handle handle)
+{
+	if (handle)
+		::close((int)(iptr)handle);
+}
+
+u64
+platform_file_read(Platform_File_Handle handle, void *data, u64 size)
+{
+	ssize_t bytes_read = ::read((int)(iptr)handle, data, size);
+	return bytes_read < 0 ? 0 : (u64)bytes_read;
+}
+
+u64
+platform_file_write(Platform_File_Handle handle, const void *data, u64 size)
+{
+	ssize_t bytes_written = ::write((int)(iptr)handle, data, size);
+	return bytes_written < 0 ? 0 : (u64)bytes_written;
+}
+
+bool
+platform_file_seek(Platform_File_Handle handle, i64 offset, Platform_File_Seek_Origin origin)
+{
+	int whence = SEEK_SET;
+	switch (origin)
+	{
+		case PLATFORM_FILE_SEEK_ORIGIN_BEGIN:   whence = SEEK_SET; break;
+		case PLATFORM_FILE_SEEK_ORIGIN_CURRENT: whence = SEEK_CUR; break;
+		case PLATFORM_FILE_SEEK_ORIGIN_END:     whence = SEEK_END; break;
+	}
+	return ::lseek((int)(iptr)handle, (off_t)offset, whence) != (off_t)-1;
+}
+
+u64
+platform_file_tell(Platform_File_Handle handle)
+{
+	return (u64)::lseek((int)(iptr)handle, 0, SEEK_CUR);
+}
+
+u64
+platform_file_size(Platform_File_Handle handle)
+{
+	struct stat st = {};
+	::fstat((int)(iptr)handle, &st);
+	return (u64)st.st_size;
+}
+
 bool
 platform_file_copy(const char *from, const char *to)
 {
diff --git a/core/platform/platform_macos.mm b/core/platform/platform_macos.mm
index b6aba00f..e5b1c2fc 100644
--- a/core/platform/platform_macos.mm
+++ b/core/platform/platform_macos.mm
@@ -49,7 +49,7 @@
 }
 
 inline static PLATFORM_KEY
-_platform_key_from_button_number(i32 button_number)
+_platform_key_from_button_number(I32 button_number)
 {
 	switch (button_number)
 	{
@@ -61,7 +61,7 @@
 }
 
 inline static PLATFORM_KEY
-_platform_key_from_key_code(i32 key_code)
+_platform_key_from_key_code(I32 key_code)
 {
 	switch (key_code)
 	{
@@ -388,8 +388,8 @@ @implementation Window_Delegate
 	if (platform_path_is_directory(path))
 		return path_directory;
 
-	u64 path_directory_length = string_find_last_of(path_directory, '/');
-	if (path_directory_length != u64(-1))
+	U64 path_directory_length = string_find_last_of(path_directory, '/');
+	if (path_directory_length != U64(-1))
 		string_resize(path_directory, path_directory_length);
 	return path_directory;
 }
@@ -412,14 +412,14 @@ @implementation Window_Delegate
 platform_path_get_executable_path(memory::Allocator *allocator)
 {
 	char module_path_relative[PATH_MAX + 1];
-	u32 module_path_relative_size = sizeof(module_path_relative);
+	U32 module_path_relative_size = sizeof(module_path_relative);
 	::memset(module_path_relative, 0, module_path_relative_size);
 
 	char module_path_absolute[PATH_MAX + 1];
 	::memset(module_path_absolute, 0, sizeof(module_path_absolute));
 
-	i64 module_path_relative_length = ::_NSGetExecutablePath(module_path_relative, &module_path_relative_size);
-	validate(module_path_relative_length != -1 && module_path_relative_length < (i64)sizeof(module_path_relative), "[PLATFORM]: Failed to get relative path of the current executable.");
+	I64 module_path_relative_length = ::_NSGetExecutablePath(module_path_relative, &module_path_relative_size);
+	validate(module_path_relative_length != -1 && module_path_relative_length < (I64)sizeof(module_path_relative), "[PLATFORM]: Failed to get relative path of the current executable.");
 
 	char *path_absolute = ::realpath(module_path_relative, module_path_absolute);
 	validate(path_absolute == module_path_absolute, "[PLATFORM]: Failed to get absolute path of the current executable.");
@@ -433,7 +433,7 @@ @implementation Window_Delegate
 	String path_temp = string_copy(path, memory::temp_allocator());
 	string_replace(path_temp, "\\", "/");
 	Array<String> splits = string_split(path_temp, "/", true, memory::temp_allocator());
-	return string_copy(array_last(splits), allocator);
+	return string_copy(array_back(splits), allocator);
 }
 
 String
@@ -441,17 +441,17 @@ @implementation Window_Delegate
 {
 	String content = string_init(allocator);
 
-	i32 file_handle = ::open(path.data, O_RDONLY, S_IRWXU);
+	I32 file_handle = ::open(path.data, O_RDONLY, S_IRWXU);
 	if (file_handle == -1)
 		return content;
 
-	u64 file_size = platform_file_size(path.data);
+	U64 file_size = platform_file_size(path.data);
 	if (file_size == 0)
 		return content;
 
 	string_resize(content, file_size);
 
-	i64 bytes_read = ::read(file_handle, content.data, content.count);
+	I64 bytes_read = ::read(file_handle, content.data, content.count);
 	validate(::close(file_handle) == 0, "[PLATFORM]: Failed to close file handle.");
 	if (bytes_read == -1)
 		return content;
@@ -459,14 +459,14 @@ @implementation Window_Delegate
 	return content;
 }
 
-u64
+U64
 platform_path_write_file(const String &path, Block block)
 {
-	i32 file_handle = ::open(path.data, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
+	I32 file_handle = ::open(path.data, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
 	if (file_handle == -1)
 		return 0;
 
-	i64 bytes_written = ::write(file_handle, block.data, block.size);
+	I64 bytes_written = ::write(file_handle, block.data, block.size);
 	validate(::close(file_handle) == 0, "[PLATFORM]: Failed to close file handle.");
 	if (bytes_written == -1)
 		return 0;
@@ -498,12 +498,12 @@ @implementation Window_Delegate
 		String file_name = string_from(entry->d_name, memory::temp_allocator());
 		if (extension_filter.count > 0)
 		{
-			u64 extension_position = string_find_last_of(file_name, '.');
-			if (extension_position == u64(-1))
+			U64 extension_position = string_find_last_of(file_name, '.');
+			if (extension_position == U64(-1))
 				continue;
 
 			String file_extension = string_with_capacity(file_name.count - extension_position - 1, memory::temp_allocator());
-			for (u64 i = extension_position + 1; i < file_name.count; ++i)
+			for (U64 i = extension_position + 1; i < file_name.count; ++i)
 				string_append(file_extension, file_name.data[i]);
 
 			if (file_extension != extension_filter)
@@ -547,7 +547,7 @@ @implementation Window_Delegate
 	validate(self.api, "[PLATFORM]: Failed to get api.");
 
 	struct stat file_stat = {};
-	[[maybe_unused]] i32 stat_result = ::stat(src_relative_path, &file_stat);
+	[[maybe_unused]] I32 stat_result = ::stat(src_relative_path, &file_stat);
 	validate(stat_result == 0, "[PLATFORM]: Failed to get file attributes.");
 
 	self.last_write_time = file_stat.st_mtime;
@@ -576,10 +576,10 @@ @implementation Window_Delegate
 	_string_concat(self->filepath, ".tmp", dst_absolute_path);
 
 	struct stat file_stat = {};
-	i32 stat_result = ::stat(self->filepath, &file_stat);
+	I32 stat_result = ::stat(self->filepath, &file_stat);
 	validate(stat_result == 0, "[PLATFORM]: Failed to get file attributes.");
 
-	i64 last_write_time = file_stat.st_mtime;
+	I64 last_write_time = file_stat.st_mtime;
 	if ((last_write_time == self->last_write_time) || (stat_result != 0))
 		return self->api;
 
@@ -609,10 +609,10 @@ @implementation Window_Delegate
 
 
 Platform_Allocator
-platform_allocator_init(u64 size_in_bytes)
+platform_allocator_init(U64 size_in_bytes)
 {
 	Platform_Allocator self = {};
-	self.ptr = (u8 *)::mmap(0, size_in_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	self.ptr = (U8 *)::mmap(0, size_in_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	if (self.ptr)
 		self.size = size_in_bytes;
 	return self;
@@ -621,12 +621,12 @@ @implementation Window_Delegate
 void
 platform_allocator_deinit(Platform_Allocator *self)
 {
-	[[maybe_unused]] i32 result = ::munmap(self->ptr, self->size);
+	[[maybe_unused]] I32 result = ::munmap(self->ptr, self->size);
 	validate(result == 0, "[PLATFORM][MACOS]: Failed to free virtual memory.");
 }
 
 Platform_Memory
-platform_allocator_alloc(Platform_Allocator *self, u64 size_in_bytes)
+platform_allocator_alloc(Platform_Allocator *self, U64 size_in_bytes)
 {
 	Platform_Memory res = {};
 	if (self->used + size_in_bytes >= self->size)
@@ -701,7 +701,7 @@ @implementation Window_Delegate
 
 // TODO: Return early with error message if failed to create objects.
 Platform_Window
-platform_window_init(u32 width, u32 height, const char *title)
+platform_window_init(U32 width, U32 height, const char *title)
 {
 	Platform_Window_Context *ctx = memory::allocate_zeroed<Platform_Window_Context>();
 
@@ -769,7 +769,7 @@ @implementation Window_Delegate
 {
 	Platform_Window_Context *ctx = (Platform_Window_Context *)self->handle;
 
-	for (i32 i = 0; i < PLATFORM_KEY_COUNT; ++i)
+	for (I32 i = 0; i < PLATFORM_KEY_COUNT; ++i)
 	{
 		self->input.keys[i].pressed       = false;
 		self->input.keys[i].released      = false;
@@ -894,14 +894,14 @@ @implementation Window_Delegate
 platform_set_current_directory()
 {
 	char module_path_relative[PATH_MAX + 1];
-	u32 module_path_relative_size = sizeof(module_path_relative);
+	U32 module_path_relative_size = sizeof(module_path_relative);
 	::memset(module_path_relative, 0, module_path_relative_size);
 
 	char module_path_absolute[PATH_MAX + 1];
 	::memset(module_path_absolute, 0, sizeof(module_path_absolute));
 
-	[[maybe_unused]] i64 module_path_relative_length = ::_NSGetExecutablePath(module_path_relative, &module_path_relative_size);
-	validate(module_path_relative_length != -1 && module_path_relative_length < (i64)sizeof(module_path_relative), "[PLATFORM]: Failed to get relative path of the current executable.");
+	[[maybe_unused]] I64 module_path_relative_length = ::_NSGetExecutablePath(module_path_relative, &module_path_relative_size);
+	validate(module_path_relative_length != -1 && module_path_relative_length < (I64)sizeof(module_path_relative), "[PLATFORM]: Failed to get relative path of the current executable.");
 
 	[[maybe_unused]] char *path_absolute = ::realpath(module_path_relative, module_path_absolute);
 	validate(path_absolute == module_path_absolute, "[PLATFORM]: Failed to get absolute path of the current executable.");
@@ -915,7 +915,7 @@ @implementation Window_Delegate
 	}
 	*last_slash = '\0';
 
-	[[maybe_unused]] i32 result = ::chdir(module_path_absolute);
+	[[maybe_unused]] I32 result = ::chdir(module_path_absolute);
 	validate(result == 0, "[PLATFORM]: Failed to set current directory.");
 	::strcpy(current_executable_directory, module_path_absolute);
 }
@@ -927,7 +927,7 @@ @implementation Window_Delegate
 	return ::stat(filepath, &file_stat) == 0;
 }
 
-u64
+U64
 platform_file_size(const char *filepath)
 {
 	struct stat file_stat = {};
@@ -941,17 +941,17 @@ @implementation Window_Delegate
 {
 	String content = string_init(allocator);
 
-	i32 file_handle = ::open(file_path.data, O_RDONLY, S_IRWXU);
+	I32 file_handle = ::open(file_path.data, O_RDONLY, S_IRWXU);
 	if (file_handle == -1)
 		return content;
 
-	u64 file_size = platform_file_size(file_path.data);
+	U64 file_size = platform_file_size(file_path.data);
 	if (file_size == 0)
 		return content;
 
 	string_resize(content, file_size);
 
-	i64 bytes_read = ::read(file_handle, content.data, content.count);
+	I64 bytes_read = ::read(file_handle, content.data, content.count);
 	validate(::close(file_handle) == 0, "[PLATFORM]: Failed to close file handle.");
 	if (bytes_read == -1)
 		return content;
@@ -959,30 +959,30 @@ @implementation Window_Delegate
 	return content;
 }
 
-u64
+U64
 platform_file_read(const char *filepath, Platform_Memory mem)
 {
-	i32 file_handle = ::open(filepath, O_RDONLY, S_IRWXU);
+	I32 file_handle = ::open(filepath, O_RDONLY, S_IRWXU);
 	if (file_handle == -1)
 		return 0;
 
-	i64 bytes_read = ::read(file_handle, mem.ptr, mem.size);
-	[[maybe_unused]] i32 close_result = ::close(file_handle);
+	I64 bytes_read = ::read(file_handle, mem.ptr, mem.size);
+	[[maybe_unused]] I32 close_result = ::close(file_handle);
 	validate(close_result == 0, "[PLATFORM]: Failed to close file handle.");
 	if (bytes_read == -1)
 		return 0;
 	return bytes_read;
 }
 
-u64
+U64
 platform_file_write(const char *filepath, Platform_Memory mem)
 {
-	i32 file_handle = ::open(filepath, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
+	I32 file_handle = ::open(filepath, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
 	if (file_handle == -1)
 		return 0;
 
-	i64 bytes_written = ::write(file_handle, mem.ptr, mem.size);
-	[[maybe_unused]] i32 close_result = ::close(file_handle);
+	I64 bytes_written = ::write(file_handle, mem.ptr, mem.size);
+	[[maybe_unused]] I32 close_result = ::close(file_handle);
 	validate(close_result == 0, "[PLATFORM]: Failed to close file handle.");
 	if (bytes_written == -1)
 		return 0;
@@ -992,11 +992,11 @@ @implementation Window_Delegate
 bool
 platform_file_copy(const char *from, const char *to)
 {
-	i32 src_file = ::open(from, O_RDONLY);
+	I32 src_file = ::open(from, O_RDONLY);
 	if (src_file < 0)
 		return false;
 
-	i32 dst_file = ::open(to, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
+	I32 dst_file = ::open(to, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
 	if (dst_file < 0)
 	{
 		::close(src_file);
@@ -1011,14 +1011,14 @@ @implementation Window_Delegate
 	char buffer[8192];
 	while (true)
 	{
-		i64 bytes_read = ::read(src_file, buffer, sizeof(buffer));
+		I64 bytes_read = ::read(src_file, buffer, sizeof(buffer));
 		if (bytes_read == 0)
 			break;
 
 		if (bytes_read == -1)
 			return false;
 
-		i64 bytes_written = ::write(dst_file, buffer, bytes_read);
+		I64 bytes_written = ::write(dst_file, buffer, bytes_read);
 		if (bytes_written != bytes_read)
 			return false;
 	}
@@ -1033,7 +1033,7 @@ @implementation Window_Delegate
 }
 
 bool
-platform_file_dialog_open(char *path, u32 path_length, const char *filters)
+platform_file_dialog_open(char *path, U32 path_length, const char *filters)
 {
 	::memset(path, 0, path_length);
 
@@ -1062,7 +1062,7 @@ @implementation Window_Delegate
 }
 
 bool
-platform_file_dialog_save(char *path, u32 path_length, const char *filters)
+platform_file_dialog_save(char *path, U32 path_length, const char *filters)
 {
 	::memset(path, 0, path_length);
 
@@ -1088,23 +1088,23 @@ @implementation Window_Delegate
 	return false;
 }
 
-u64
+U64
 platform_query_microseconds()
 {
 	struct timespec time;
-	[[maybe_unused]] i32 result = clock_gettime(CLOCK_MONOTONIC, &time);
+	[[maybe_unused]] I32 result = clock_gettime(CLOCK_MONOTONIC, &time);
 	validate(result == 0, "[PLATFORM]: Failed to query clock.");
 	return time.tv_sec * 1000000 + time.tv_nsec * 0.001;
 }
 
 void
-platform_sleep_set_period(u32)
+platform_sleep_set_period(U32)
 {
 
 }
 
 void
-platform_sleep(u32 milliseconds)
+platform_sleep(U32 milliseconds)
 {
 	struct timespec ts;
 	ts.tv_sec = milliseconds / 1000;
@@ -1112,8 +1112,8 @@ @implementation Window_Delegate
 	::nanosleep(&ts, 0);
 }
 
-u32
-platform_callstack_capture(void **callstack, u32 frame_count)
+U32
+platform_callstack_capture(void **callstack, U32 frame_count)
 {
 	unused(callstack, frame_count);
 #if DEBUG
@@ -1125,7 +1125,7 @@ @implementation Window_Delegate
 }
 
 void
-platform_callstack_log(void **callstack, u32 frame_count)
+platform_callstack_log(void **callstack, U32 frame_count)
 {
 	unused(callstack, frame_count);
 #if DEBUG
@@ -1134,7 +1134,7 @@ @implementation Window_Delegate
 	{
 		// TODO: Use logger.
 		::printf("callstack:\n");
-		for (u32 i = 0; i < frame_count; ++i)
+		for (U32 i = 0; i < frame_count; ++i)
 			::printf("\t[%" PRIu32 "]: %s\n", frame_count - i - 1, symbols[i]);
 
 		::free(symbols);
@@ -1143,7 +1143,7 @@ @implementation Window_Delegate
 }
 
 Platform_Font
-platform_font_init(const char *, const char *, u32, bool)
+platform_font_init(const char *, const char *, U32, bool)
 {
 	return {};
 }
diff --git a/core/platform/platform_win32.cpp b/core/platform/platform_win32.cpp
index 5cd86099..5da0020e 100644
--- a/core/platform/platform_win32.cpp
+++ b/core/platform/platform_win32.cpp
@@ -209,8 +209,8 @@ platform_path_get_directory(const String &path, memory::Allocator *allocator)
 	if (platform_path_is_directory(path))
 		return path_directory;
 
-	u64 path_directory_length = string_find_last_of(path_directory, '/');
-	if (path_directory_length != u64(-1))
+	U64 path_directory_length = string_find_last_of(path_directory, '/');
+	if (path_directory_length != U64(-1))
 		string_resize(path_directory, path_directory_length);
 	return path_directory;
 }
@@ -239,7 +239,7 @@ String
 platform_path_get_executable_path(memory::Allocator *allocator)
 {
 	String path_executable_temp = string_with_capacity(4096, memory::temp_allocator());
-	u64 path_executable_length = ::GetModuleFileName(0, path_executable_temp.data, (DWORD)path_executable_temp.count);
+	U64 path_executable_length = ::GetModuleFileName(0, path_executable_temp.data, (DWORD)path_executable_temp.count);
 	string_resize(path_executable_temp, path_executable_length);
 	string_replace(path_executable_temp, '\\', '/');
 	return string_copy(path_executable_temp, allocator);
@@ -251,7 +251,7 @@ platform_path_get_file_name(const String &path, memory::Allocator *allocator)
 	String path_temp = string_copy(path, memory::temp_allocator());
 	string_replace(path_temp, "\\", "/");
 	Array<String> splits = string_split(path_temp, "/", true, memory::temp_allocator());
-	return string_copy(array_last(splits), allocator);
+	return string_copy(array_back(splits), allocator);
 }
 
 String
@@ -263,21 +263,21 @@ platform_path_read_file(const String &path, memory::Allocator *allocator)
 	if (file_handle == INVALID_HANDLE_VALUE)
 		return content;
 
-	u64 file_size = platform_file_size(path.data);
+	U64 file_size = platform_file_size(path.data);
 	if (file_size == 0)
 		return content;
 
 	string_resize(content, file_size);
 
 	DWORD bytes_read = 0;
-	::ReadFile(file_handle, content.data, (u32)content.count, &bytes_read, 0);
+	::ReadFile(file_handle, content.data, (U32)content.count, &bytes_read, 0);
 	validate(::CloseHandle(file_handle), "[PLATFORM][WINDOWS]: Failed to close file handle.");
 	validate(content.count == bytes_read);
 
 	return content;
 }
 
-u64
+U64
 platform_path_write_file(const String &path, Block block)
 {
 	HANDLE file_handle = ::CreateFileA(path.data, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
@@ -289,7 +289,92 @@ platform_path_write_file(const String &path, Block block)
 	validate(::CloseHandle(file_handle));
 	validate(bytes_written == block.size);
 
-	return (u64)bytes_written;
+	return (U64)bytes_written;
+}
+
+Platform_File_Handle
+platform_file_open(const String &path, Platform_File_Mode mode)
+{
+	DWORD access   = 0;
+	DWORD creation = 0;
+
+	switch (mode)
+	{
+		case PLATFORM_FILE_MODE_READ:
+			access   = GENERIC_READ;
+			creation = OPEN_EXISTING;
+			break;
+		case PLATFORM_FILE_MODE_WRITE:
+			access   = GENERIC_WRITE;
+			creation = CREATE_ALWAYS;
+			break;
+		case PLATFORM_FILE_MODE_READ_WRITE:
+			access   = GENERIC_READ | GENERIC_WRITE;
+			creation = OPEN_ALWAYS;
+			break;
+		case PLATFORM_FILE_MODE_APPEND:
+			access   = FILE_APPEND_DATA;
+			creation = OPEN_ALWAYS;
+			break;
+	}
+
+	HANDLE handle = ::CreateFileA(path.data, access, FILE_SHARE_READ, nullptr, creation, FILE_ATTRIBUTE_NORMAL, nullptr);
+	return handle == INVALID_HANDLE_VALUE ? PLATFORM_FILE_HANDLE_INVALID : (Platform_File_Handle)handle;
+}
+
+void
+platform_file_close(Platform_File_Handle handle)
+{
+	if (handle)
+		::CloseHandle((HANDLE)handle);
+}
+
+U64
+platform_file_read(Platform_File_Handle handle, void *data, U64 size)
+{
+	DWORD bytes_read = 0;
+	::ReadFile((HANDLE)handle, data, (DWORD)size, &bytes_read, nullptr);
+	return (U64)bytes_read;
+}
+
+U64
+platform_file_write(Platform_File_Handle handle, const void *data, U64 size)
+{
+	DWORD bytes_written = 0;
+	::WriteFile((HANDLE)handle, data, (DWORD)size, &bytes_written, nullptr);
+	return (U64)bytes_written;
+}
+
+bool
+platform_file_seek(Platform_File_Handle handle, I64 offset, Platform_File_Seek_Origin origin)
+{
+	DWORD method = FILE_BEGIN;
+	switch (origin)
+	{
+		case PLATFORM_FILE_SEEK_ORIGIN_BEGIN:   method = FILE_BEGIN;   break;
+		case PLATFORM_FILE_SEEK_ORIGIN_CURRENT: method = FILE_CURRENT; break;
+		case PLATFORM_FILE_SEEK_ORIGIN_END:     method = FILE_END;     break;
+	}
+	LARGE_INTEGER li;
+	li.QuadPart = offset;
+	return ::SetFilePointerEx((HANDLE)handle, li, nullptr, method) != 0;
+}
+
+U64
+platform_file_tell(Platform_File_Handle handle)
+{
+	LARGE_INTEGER li     = {};
+	LARGE_INTEGER result = {};
+	::SetFilePointerEx((HANDLE)handle, li, &result, FILE_CURRENT);
+	return (U64)result.QuadPart;
+}
+
+U64
+platform_file_size(Platform_File_Handle handle)
+{
+	LARGE_INTEGER size = {};
+	::GetFileSizeEx((HANDLE)handle, &size);
+	return (U64)size.QuadPart;
 }
 
 Array<String>
@@ -316,12 +401,12 @@ platform_path_list_files(const String &directory, const String &extension_filter
 		String file_name = string_from(find_data.cFileName, memory::temp_allocator());
 		if (extension_filter.count > 0)
 		{
-			u64 extension_position = string_find_last_of(file_name, '.');
-			if (extension_position == u64(-1))
+			U64 extension_position = string_find_last_of(file_name, '.');
+			if (extension_position == U64(-1))
 				continue;
 
 			String file_extension = string_with_capacity(file_name.count - extension_position - 1, memory::temp_allocator());
-			for (u64 i = extension_position + 1; i < file_name.count; ++i)
+			for (U64 i = extension_position + 1; i < file_name.count; ++i)
 				string_append(file_extension, file_name.data[i]);
 
 			if (file_extension != extension_filter)
@@ -362,7 +447,7 @@ platform_api_init(const char *filepath)
 	result = ::GetFileAttributesExA(path, GetFileExInfoStandard, &data);
 	validate(result, "[PLATFORM]: Failed to get file attributes.");
 
-	self.last_write_time = *(i64 *)&data.ftLastWriteTime;
+	self.last_write_time = *(I64 *)&data.ftLastWriteTime;
 	::strcpy_s(self.filepath, filepath);
 
 	return self;
@@ -390,7 +475,7 @@ platform_api_load(Platform_Api *self)
 	WIN32_FILE_ATTRIBUTE_DATA data = {};
 	bool result = ::GetFileAttributesExA(path, GetFileExInfoStandard, &data);
 
-	i64 last_write_time = *(i64 *)&data.ftLastWriteTime;
+	I64 last_write_time = *(I64 *)&data.ftLastWriteTime;
 	if ((last_write_time == self->last_write_time) || (result == false))
 		return self->api;
 
@@ -420,10 +505,10 @@ platform_api_load(Platform_Api *self)
 
 
 Platform_Allocator
-platform_allocator_init(u64 size_in_bytes)
+platform_allocator_init(U64 size_in_bytes)
 {
 	Platform_Allocator self = {};
-	self.ptr = (u8 *)VirtualAlloc(0, size_in_bytes, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+	self.ptr = (U8 *)VirtualAlloc(0, size_in_bytes, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
 	if (self.ptr)
 		self.size = size_in_bytes;
 	return self;
@@ -437,7 +522,7 @@ platform_allocator_deinit(Platform_Allocator *self)
 }
 
 Platform_Memory
-platform_allocator_alloc(Platform_Allocator *self, u64 size_in_bytes)
+platform_allocator_alloc(Platform_Allocator *self, U64 size_in_bytes)
 {
 	// TODO(M-Fatah): We need a way to free allocated memory from the arena we created.
 	Platform_Memory res = {};
@@ -511,7 +596,7 @@ platform_thread_run(Platform_Thread *self, void (*function)(void *), void *user_
 }
 
 Platform_Window
-platform_window_init(u32 width, u32 height, const char *title)
+platform_window_init(U32 width, U32 height, const char *title)
 {
 	validate(width > 0 && height > 0, "[PLATFORM]: Windows cannot have zero width or height.");
 
@@ -556,7 +641,7 @@ platform_window_deinit(Platform_Window *self)
 bool
 platform_window_poll(Platform_Window *self)
 {
-	for (i32 i = 0; i < PLATFORM_KEY_COUNT; ++i)
+	for (I32 i = 0; i < PLATFORM_KEY_COUNT; ++i)
 	{
 		self->input.keys[i].pressed       = false;
 		self->input.keys[i].released      = false;
@@ -590,7 +675,7 @@ platform_window_poll(Platform_Window *self)
 					self->input.keys[key].down    = true;
 					self->input.keys[key].press_count++;
 					if (key == PLATFORM_KEY_MOUSE_WHEEL_UP || key == PLATFORM_KEY_MOUSE_WHEEL_DOWN)
-						self->input.mouse_wheel += (f32)GET_WHEEL_DELTA_WPARAM(msg.wParam) / (f32)WHEEL_DELTA;
+						self->input.mouse_wheel += (F32)GET_WHEEL_DELTA_WPARAM(msg.wParam) / (F32)WHEEL_DELTA;
 				}
 				break;
 			}
@@ -651,7 +736,7 @@ platform_window_poll(Platform_Window *self)
 		ScreenToClient((HWND)self->handle, &mouse_point);
 
 		// NOTE: We want mouse coords to start bottom-left.
-		u32 mouse_point_y_inverted = (self->height - 1) - mouse_point.y;
+		U32 mouse_point_y_inverted = (self->height - 1) - mouse_point.y;
 		self->input.mouse_dx = mouse_point.x - self->input.mouse_x;
 		self->input.mouse_dy = self->input.mouse_y - mouse_point_y_inverted;
 		self->input.mouse_x  = mouse_point.x;
@@ -708,7 +793,7 @@ platform_file_exists(const char *filepath)
 	return attributes != INVALID_FILE_ATTRIBUTES;
 }
 
-u64
+U64
 platform_file_size(const char *filepath)
 {
 	WIN32_FILE_ATTRIBUTE_DATA data = {};
@@ -721,7 +806,7 @@ platform_file_size(const char *filepath)
 	return size.QuadPart;
 }
 
-u64
+U64
 platform_file_read(const char *filepath, Platform_Memory mem)
 {
 	HANDLE file_handle = CreateFileA(filepath, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, 0, 0);
@@ -730,10 +815,10 @@ platform_file_read(const char *filepath, Platform_Memory mem)
 
 	// TODO(M-Fatah): Handle reading files that are bigger than 4GB size.
 	DWORD bytes_read = 0;
-	ReadFile(file_handle, mem.ptr, (u32)mem.size, &bytes_read, 0);
+	ReadFile(file_handle, mem.ptr, (U32)mem.size, &bytes_read, 0);
 	CloseHandle(file_handle);
 
-	return (u64)bytes_read;
+	return (U64)bytes_read;
 }
 
 String
@@ -745,14 +830,14 @@ platform_file_read(const String &file_path, memory::Allocator *allocator)
 	if (file_handle == INVALID_HANDLE_VALUE)
 		return content;
 
-	u64 file_size = platform_file_size(file_path.data);
+	U64 file_size = platform_file_size(file_path.data);
 	if (file_size == 0)
 		return content;
 
 	string_resize(content, file_size);
 
 	DWORD bytes_read = 0;
-	::ReadFile(file_handle, content.data, (u32)content.count, &bytes_read, 0);
+	::ReadFile(file_handle, content.data, (U32)content.count, &bytes_read, 0);
 	validate(::CloseHandle(file_handle), "[PLATFORM][WINDOWS]: Failed to close file handle.");
 
 	validate(content.count == bytes_read);
@@ -760,7 +845,7 @@ platform_file_read(const String &file_path, memory::Allocator *allocator)
 	return content;
 }
 
-u64
+U64
 platform_file_write(const char *filepath, Platform_Memory mem)
 {
 	HANDLE file_handle = CreateFileA(filepath, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
@@ -772,7 +857,7 @@ platform_file_write(const char *filepath, Platform_Memory mem)
 	WriteFile(file_handle, mem.ptr, (DWORD)mem.size, &bytes_written, 0);
 	CloseHandle(file_handle);
 
-	return (u64)bytes_written;
+	return (U64)bytes_written;
 }
 
 bool
@@ -788,7 +873,7 @@ platform_file_delete(const char *filepath)
 }
 
 bool
-platform_file_dialog_open(char *path, u32 path_length, const char *filters)
+platform_file_dialog_open(char *path, U32 path_length, const char *filters)
 {
 	::memset(path, 0, path_length);
 
@@ -810,7 +895,7 @@ platform_file_dialog_open(char *path, u32 path_length, const char *filters)
 }
 
 bool
-platform_file_dialog_save(char *path, u32 path_length, const char *filters)
+platform_file_dialog_save(char *path, U32 path_length, const char *filters)
 {
 	::memset(path, 0, path_length);
 
@@ -833,7 +918,7 @@ platform_file_dialog_save(char *path, u32 path_length, const char *filters)
 }
 
 
-u64
+U64
 platform_query_microseconds()
 {
 	LARGE_INTEGER frequency;
@@ -850,7 +935,7 @@ platform_query_microseconds()
 }
 
 void
-platform_sleep_set_period(u32 period)
+platform_sleep_set_period(U32 period)
 {
 	if (timeBeginPeriod(period) != TIMERR_NOERROR)
 	{
@@ -859,7 +944,7 @@ platform_sleep_set_period(u32 period)
 }
 
 void
-platform_sleep(u32 milliseconds)
+platform_sleep(U32 milliseconds)
 {
 	Sleep(milliseconds);
 }
@@ -879,8 +964,8 @@ struct Callstack
 	}
 };
 
-u32
-platform_callstack_capture([[maybe_unused]] void **callstack, [[maybe_unused]] u32 frame_count)
+U32
+platform_callstack_capture([[maybe_unused]] void **callstack, [[maybe_unused]] U32 frame_count)
 {
 #if DEBUG
 	::memset(callstack, 0, frame_count * sizeof(callstack));
@@ -891,7 +976,7 @@ platform_callstack_capture([[maybe_unused]] void **callstack, [[maybe_unused]] u
 }
 
 void
-platform_callstack_log([[maybe_unused]] void **callstack, [[maybe_unused]] u32 frame_count)
+platform_callstack_log([[maybe_unused]] void **callstack, [[maybe_unused]] U32 frame_count)
 {
 #if DEBUG
 	static Callstack _callstack;
@@ -916,7 +1001,7 @@ platform_callstack_log([[maybe_unused]] void **callstack, [[maybe_unused]] u32 f
 
 	// Allocate a buffer for the symbol info.
 	// Windows lays symbol info in memory in the form [struct][name buffer].
-	constexpr u64 MAX_NAME_LENGTH = 256;
+	constexpr U64 MAX_NAME_LENGTH = 256;
 	char symbol_buffer[MAX_NAME_LENGTH + sizeof(SYMBOL_INFO)];
 
 	SYMBOL_INFO *symbol_info = (SYMBOL_INFO *)symbol_buffer;
@@ -926,7 +1011,7 @@ platform_callstack_log([[maybe_unused]] void **callstack, [[maybe_unused]] u32 f
 
 	// TODO: Use logger.
 	::printf("callstack:\n");
-	for (u64 i = 0; i < frame_count; ++i)
+	for (U64 i = 0; i < frame_count; ++i)
 	{
 		bool symbol_found = false;
 		bool line_found   = false;
@@ -965,29 +1050,29 @@ platform_callstack_log([[maybe_unused]] void **callstack, [[maybe_unused]] u32 f
 
 
 Platform_Font
-platform_font_init(const char *filepath, const char *face_name, u32 font_height, bool origin_top_left)
+platform_font_init(const char *filepath, const char *face_name, U32 font_height, bool origin_top_left)
 {
 	// Supported glyph range.
-	constexpr i32 GLYPH_RANGE[2]             = {'!', '~'};
-	constexpr u32 GLYPH_COUNT                = (GLYPH_RANGE[1] + 1) - GLYPH_RANGE[0];
+	constexpr I32 GLYPH_RANGE[2]             = {'!', '~'};
+	constexpr U32 GLYPH_COUNT                = (GLYPH_RANGE[1] + 1) - GLYPH_RANGE[0];
 
 	// Font bitmap config. This is used to rasterize each glyph by winapi.
-	constexpr u32 BITMAP_MAX_WIDTH           = 1024;
-	constexpr u32 BITMAP_MAX_HEIGHT          = 1024;
-	constexpr u32 BYTES_PER_PIXEL            = 1;
-	constexpr u32 APRON                      = 1;
-	u8 *temp_glyph_bitmaps[GLYPH_COUNT] = {};
+	constexpr U32 BITMAP_MAX_WIDTH           = 1024;
+	constexpr U32 BITMAP_MAX_HEIGHT          = 1024;
+	constexpr U32 BYTES_PER_PIXEL            = 1;
+	constexpr U32 APRON                      = 1;
+	U8 *temp_glyph_bitmaps[GLYPH_COUNT] = {};
 
 	// Atlas texture config.
-	constexpr u32 XPADDING                   = 3;
-	constexpr u32 YPADDING                   = 3;
-	u32 xoffset                              = XPADDING;
-	u32 total_glyph_width                    = 0;
-	u32 max_glyph_height                     = 0;
+	constexpr U32 XPADDING                   = 3;
+	constexpr U32 YPADDING                   = 3;
+	U32 xoffset                              = XPADDING;
+	U32 total_glyph_width                    = 0;
+	U32 max_glyph_height                     = 0;
 
 	// Kerning config.
-	constexpr u32 KERNING_ADJUSTMENT         = 3;
-	i32 *kerning_table                       = (i32 *)memory::allocate_zeroed(GLYPH_COUNT * GLYPH_COUNT * sizeof(i32));
+	constexpr U32 KERNING_ADJUSTMENT         = 3;
+	I32 *kerning_table                       = memory::allocate_zeroed<I32>(GLYPH_COUNT * GLYPH_COUNT);
 
 	// Extract the font from Windows.
 	AddFontResourceEx(filepath, FR_PRIVATE, 0);
@@ -1019,12 +1104,12 @@ platform_font_init(const char *filepath, const char *face_name, u32 font_height,
 	});
 
 	// Get kerning pairs.
-	u32 kerning_pair_count     = GetKerningPairsW(device_context, 0, 0);
-	KERNINGPAIR *kerning_pairs = (KERNINGPAIR *)memory::allocate(memory::temp_allocator(), kerning_pair_count * sizeof(KERNINGPAIR));
+	U32 kerning_pair_count     = GetKerningPairsW(device_context, 0, 0);
+	KERNINGPAIR *kerning_pairs = memory::allocate<KERNINGPAIR>(memory::temp_allocator(), kerning_pair_count);
 	GetKerningPairsW(device_context, kerning_pair_count, kerning_pairs);
 	if (kerning_pair_count > 0)
 	{
-		for (u32 i = 0; i <= kerning_pair_count; ++i)
+		for (U32 i = 0; i <= kerning_pair_count; ++i)
 		{
 			KERNINGPAIR *pair = kerning_pairs + i;
 			if ((pair->wFirst  >= GLYPH_RANGE[0]) &&
@@ -1032,8 +1117,8 @@ platform_font_init(const char *filepath, const char *face_name, u32 font_height,
 				(pair->wSecond >= GLYPH_RANGE[0]) &&
 				(pair->wSecond <= GLYPH_RANGE[1]))
 			{
-				i32 kern_index_1 = (pair->wFirst  - GLYPH_RANGE[0]);
-				i32 kern_index_2 = (pair->wSecond - GLYPH_RANGE[0]);
+				I32 kern_index_1 = (pair->wFirst  - GLYPH_RANGE[0]);
+				I32 kern_index_2 = (pair->wSecond - GLYPH_RANGE[0]);
 				kerning_table[kern_index_1 + kern_index_2 * GLYPH_COUNT] = pair->iKernAmount;
 			}
 		}
@@ -1045,32 +1130,32 @@ platform_font_init(const char *filepath, const char *face_name, u32 font_height,
 
 	// NOTE: We don't deinit this array here, since we will pass its pointer to the font structure, the user is left to free that pointer when done with font.
 	Array<Glyph> glyphs = array_init<Glyph>();
-	for (i32 c = GLYPH_RANGE[0]; c <= GLYPH_RANGE[1]; ++c)
+	for (I32 c = GLYPH_RANGE[0]; c <= GLYPH_RANGE[1]; ++c)
 	{
 		wchar_t point = (wchar_t)c;
 		SIZE size;
 		GetTextExtentPoint32W(device_context, &point, 1, &size);
 
-		i32 w = size.cx;
+		I32 w = size.cx;
 		if (w > BITMAP_MAX_WIDTH)
 			w = BITMAP_MAX_WIDTH;
 
-		i32 h = size.cy;
+		I32 h = size.cy;
 		if (h > BITMAP_MAX_HEIGHT)
 			h = BITMAP_MAX_HEIGHT;
 
 		TextOutW(device_context, 0, 0, &point, 1);
 
 		// Loop over the glyph's pixels and delete all empty pixels surrounding the font glyph.
-		i32 min_x =  10000;
-		i32 min_y =  10000;
-		i32 max_x = -10000;
-		i32 max_y = -10000;
-		u32 *row = (u32 *)bitmap_data + (BITMAP_MAX_HEIGHT - 1) * BITMAP_MAX_WIDTH;
-		for (i32 y = 0; y < h; ++y)
+		I32 min_x =  10000;
+		I32 min_y =  10000;
+		I32 max_x = -10000;
+		I32 max_y = -10000;
+		U32 *row = (U32 *)bitmap_data + (BITMAP_MAX_HEIGHT - 1) * BITMAP_MAX_WIDTH;
+		for (I32 y = 0; y < h; ++y)
 		{
-			u32 *pixel = row;
-			for (i32 x = 0; x < w; ++x)
+			U32 *pixel = row;
+			for (I32 x = 0; x < w; ++x)
 			{
 				if (*pixel != 0)
 				{
@@ -1115,28 +1200,28 @@ platform_font_init(const char *filepath, const char *face_name, u32 font_height,
 			// Adjust the kerning amount for the current glyph relative to the rest of the supported glyphs.
 			ABC this_abc;
 			GetCharABCWidthsW(device_context, glyph.codepoint, glyph.codepoint, &this_abc);
-			for (i32 c1 = GLYPH_RANGE[0]; c1 <= GLYPH_RANGE[1]; ++c1)
+			for (I32 c1 = GLYPH_RANGE[0]; c1 <= GLYPH_RANGE[1]; ++c1)
 			{
-				i32 kern_index_1 = (glyph.codepoint - GLYPH_RANGE[0]);
-				i32 kern_index_2 = (c1 - GLYPH_RANGE[0]);
+				I32 kern_index_1 = (glyph.codepoint - GLYPH_RANGE[0]);
+				I32 kern_index_2 = (c1 - GLYPH_RANGE[0]);
 				kerning_table[kern_index_1 + kern_index_2 * GLYPH_COUNT] += min_x - this_abc.abcA + KERNING_ADJUSTMENT;
 			}
 
 			// Allocate a temporary memory buffer to store the current glyph's bitmap.
-			i32 index = c - GLYPH_RANGE[0];
-			temp_glyph_bitmaps[index] = (u8 *)memory::allocate_zeroed(memory::temp_allocator(), glyph.width * glyph.height * BYTES_PER_PIXEL);
+			I32 index = c - GLYPH_RANGE[0];
+			temp_glyph_bitmaps[index] = memory::allocate_zeroed<U8>(memory::temp_allocator(), glyph.width * glyph.height * BYTES_PER_PIXEL);
 
 			// Fill the glyph's bitmap.
-			u8  *dst_row = temp_glyph_bitmaps[index] + APRON * glyph.width * BYTES_PER_PIXEL;
-			u32 *src_row = (u32 *)bitmap_data + (BITMAP_MAX_HEIGHT - APRON - min_y) * BITMAP_MAX_WIDTH;
-			for (i32 y = min_y; y <= max_y; ++y)
+			U8  *dst_row = temp_glyph_bitmaps[index] + APRON * glyph.width * BYTES_PER_PIXEL;
+			U32 *src_row = (U32 *)bitmap_data + (BITMAP_MAX_HEIGHT - APRON - min_y) * BITMAP_MAX_WIDTH;
+			for (I32 y = min_y; y <= max_y; ++y)
 			{
-				u32 *src = (u32 *)src_row + min_x;
-				u8 *dst  = dst_row + APRON;
-				for (i32 x = min_x; x <= max_x; ++x)
+				U32 *src = (U32 *)src_row + min_x;
+				U8 *dst  = dst_row + APRON;
+				for (I32 x = min_x; x <= max_x; ++x)
 				{
-					u32 pixel = *src;
-					*dst++ = (u8)(pixel & 0xFF);
+					U32 pixel = *src;
+					*dst++ = (U8)(pixel & 0xFF);
 					++src;
 				}
 				dst_row += glyph.width * BYTES_PER_PIXEL;
@@ -1146,18 +1231,18 @@ platform_font_init(const char *filepath, const char *face_name, u32 font_height,
 	}
 
 	// NOTE: Account for the extra XPADDING at the left and the YPADDING at the bottom and top.
-	u32 atlas_width  = total_glyph_width + XPADDING;
-	u32 atlas_height = max_glyph_height  + YPADDING * 2;
+	U32 atlas_width  = total_glyph_width + XPADDING;
+	U32 atlas_height = max_glyph_height  + YPADDING * 2;
 
 	// Fill the atlas texture.
-	u8 *atlas = (u8 *)memory::allocate_zeroed(atlas_width * atlas_height * BYTES_PER_PIXEL);
-	for (u32 i = 0; i < glyphs.count; ++i)
+	U8 *atlas = memory::allocate_zeroed<U8>(atlas_width * atlas_height * BYTES_PER_PIXEL);
+	for (U32 i = 0; i < glyphs.count; ++i)
 	{
 		Glyph &glyph = glyphs[i];
-		u8 *src      = temp_glyph_bitmaps[i];
-		for (u32 y = 0; y < glyph.height; ++y)
+		U8 *src      = temp_glyph_bitmaps[i];
+		for (U32 y = 0; y < glyph.height; ++y)
 		{
-			for (u32 x = 0; x < glyph.width; ++x)
+			for (U32 x = 0; x < glyph.width; ++x)
 			{
 				if (origin_top_left)
 					atlas[x + xoffset + (y + YPADDING) * atlas_width] = src[x + y * glyph.width];
@@ -1169,20 +1254,20 @@ platform_font_init(const char *filepath, const char *face_name, u32 font_height,
 		if (origin_top_left)
 		{
 			// Min UV coordinates (x1, y1).
-			glyph.uv_min_x = (f32)xoffset  / (f32)atlas_width;
-			glyph.uv_min_y = (f32)YPADDING / (f32)atlas_height;
+			glyph.uv_min_x = (F32)xoffset  / (F32)atlas_width;
+			glyph.uv_min_y = (F32)YPADDING / (F32)atlas_height;
 			// Max UV coordinates (x2, y2).
-			glyph.uv_max_x = (f32)(xoffset  + glyph.width)  / (f32)atlas_width;
-			glyph.uv_max_y = (f32)(YPADDING + glyph.height) / (f32)atlas_height;
+			glyph.uv_max_x = (F32)(xoffset  + glyph.width)  / (F32)atlas_width;
+			glyph.uv_max_y = (F32)(YPADDING + glyph.height) / (F32)atlas_height;
 		}
 		else
 		{
 			// Min UV coordinates (x1, y1).
-			glyph.uv_min_x = (f32)xoffset / (f32)atlas_width;
-			glyph.uv_min_y = (f32)(atlas_height - YPADDING + 1) / (f32)atlas_height;
+			glyph.uv_min_x = (F32)xoffset / (F32)atlas_width;
+			glyph.uv_min_y = (F32)(atlas_height - YPADDING + 1) / (F32)atlas_height;
 			// Max UV coordinates (x2, y2).
-			glyph.uv_max_x = (f32)(xoffset + glyph.width) / (f32)atlas_width;
-			glyph.uv_max_y = (f32)(atlas_height - YPADDING - glyph.height + 1) / (f32)atlas_height;
+			glyph.uv_max_x = (F32)(xoffset + glyph.width) / (F32)atlas_width;
+			glyph.uv_max_y = (F32)(atlas_height - YPADDING - glyph.height + 1) / (F32)atlas_height;
 		}
 
 		xoffset += glyph.width + XPADDING;
@@ -1202,7 +1287,7 @@ platform_font_init(const char *filepath, const char *face_name, u32 font_height,
 	font.max_glyph_height = max_glyph_height;
 	font.kerning_table    = kerning_table;
 	font.glyphs           = glyphs.data;
-	font.glyph_count      = (u32)glyphs.count;
+	font.glyph_count      = (U32)glyphs.count;
 	font.atlas            = atlas;
 	font.atlas_width      = atlas_width;
 	font.atlas_height     = atlas_height;
diff --git a/core/reflect.h b/core/reflect.h
index d62dd3e9..b3dfb53e 100644
--- a/core/reflect.h
+++ b/core/reflect.h
@@ -46,12 +46,12 @@
 	- [ ] Cleanup.
 */
 
-inline static constexpr const u64 REFLECT_MAX_NAME_LENGTH      = 128;
-inline static constexpr const i32 REFLECT_MIN_ENUM_VALUE       = -32;
-inline static constexpr const i32 REFLECT_MAX_ENUM_VALUE       =  64;
-inline static constexpr const i32 REFLECT_MAX_ENUM_VALUE_COUNT = REFLECT_MAX_ENUM_VALUE - REFLECT_MIN_ENUM_VALUE;
+inline static constexpr const U64 REFLECT_MAX_NAME_LENGTH      = 128;
+inline static constexpr const I32 REFLECT_MIN_ENUM_VALUE       = -32;
+inline static constexpr const I32 REFLECT_MAX_ENUM_VALUE       =  64;
+inline static constexpr const I32 REFLECT_MAX_ENUM_VALUE_COUNT = REFLECT_MAX_ENUM_VALUE - REFLECT_MIN_ENUM_VALUE;
 
-enum TYPE_KIND
+enum TYPE_KIND : int
 {
 	TYPE_KIND_I8,
 	TYPE_KIND_I16,
@@ -74,14 +74,14 @@ enum TYPE_KIND
 
 struct Type_Enum_Value
 {
-	i32 index;
+	I32 index;
 	const char *name;
 };
 
 struct Type_Field
 {
 	const char *name;
-	u64 offset;
+	U64 offset;
 	const struct Type *type;
 	const char *tag;
 };
@@ -90,8 +90,8 @@ struct Type
 {
 	const char *name;
 	TYPE_KIND kind;
-	u64 size;
-	u64 align;
+	U64 size;
+	U64 align;
 	union
 	{
 		struct
@@ -101,17 +101,17 @@ struct Type
 		struct
 		{
 			const Type *element;
-			u64 element_count;
+			U64 element_count;
 		} as_array;
 		struct
 		{
 			const Type_Enum_Value *values;
-			u64 value_count;
+			U64 value_count;
 		} as_enum;
 		struct
 		{
 			const Type_Field *fields;
-			u64 field_count;
+			U64 field_count;
 		} as_struct;
 	};
 };
@@ -123,14 +123,14 @@ struct Value
 };
 
 inline static constexpr void
-_reflect_append_name(char *name, u64 &count, std::string_view type_name)
+_reflect_append_name(char *name, U64 &count, std::string_view type_name)
 {
-	constexpr auto string_append = [](char *string, const char *to_append, u64 &count) {
+	constexpr auto string_append = [](char *string, const char *to_append, U64 &count) {
 		while(*to_append != '\0' && count < REFLECT_MAX_NAME_LENGTH - 1)
 			string[count++] = *to_append++;
 	};
 
-	constexpr auto append_type_name_prettified = [string_append](char *name, std::string_view type_name, u64 &count) {
+	constexpr auto append_type_name_prettified = [string_append](char *name, std::string_view type_name, U64 &count) {
 		if (type_name.starts_with(' '))
 			type_name.remove_prefix(1);
 
@@ -300,14 +300,14 @@ _reflect_append_name(char *name, u64 &count, std::string_view type_name)
 
 	if (type_name.ends_with('>'))
 	{
-		u64 open_angle_bracket_pos = type_name.find('<');
+		U64 open_angle_bracket_pos = type_name.find('<');
 		append_type_name_prettified(name, type_name.substr(0, open_angle_bracket_pos), count);
 		type_name.remove_prefix(open_angle_bracket_pos + 1);
 
 		name[count++] = '<';
-		u64 prev = 0;
-		u64 match = 1;
-		for (u64 c = 0; c < type_name.length(); ++c)
+		U64 prev = 0;
+		U64 match = 1;
+		for (U64 c = 0; c < type_name.length(); ++c)
 		{
 			if (type_name.at(c) == '<')
 			{
@@ -350,16 +350,16 @@ template <typename T>
 inline static constexpr const char *
 name_of()
 {
-		 if constexpr (std::is_same_v<T, i8>)   return "i8";
-	else if constexpr (std::is_same_v<T, i16>)  return "i16";
-	else if constexpr (std::is_same_v<T, i32>)  return "i32";
-	else if constexpr (std::is_same_v<T, i64>)  return "i64";
-	else if constexpr (std::is_same_v<T, u8>)   return "u8";
-	else if constexpr (std::is_same_v<T, u16>)  return "u16";
-	else if constexpr (std::is_same_v<T, u32>)  return "u32";
-	else if constexpr (std::is_same_v<T, u64>)  return "u64";
-	else if constexpr (std::is_same_v<T, f32>)  return "f32";
-	else if constexpr (std::is_same_v<T, f64>)  return "f64";
+		 if constexpr (std::is_same_v<T, I8>)   return "i8";
+	else if constexpr (std::is_same_v<T, I16>)  return "i16";
+	else if constexpr (std::is_same_v<T, I32>)  return "i32";
+	else if constexpr (std::is_same_v<T, I64>)  return "i64";
+	else if constexpr (std::is_same_v<T, U8>)   return "u8";
+	else if constexpr (std::is_same_v<T, U16>)  return "u16";
+	else if constexpr (std::is_same_v<T, U32>)  return "u32";
+	else if constexpr (std::is_same_v<T, U64>)  return "u64";
+	else if constexpr (std::is_same_v<T, F32>)  return "f32";
+	else if constexpr (std::is_same_v<T, F64>)  return "f64";
 	else if constexpr (std::is_same_v<T, bool>) return "bool";
 	else if constexpr (std::is_same_v<T, char>) return "char";
 	else if constexpr (std::is_same_v<T, void>) return "void";
@@ -367,7 +367,7 @@ name_of()
 	{
 		constexpr auto get_type_name = [](std::string_view type_name) -> const char * {
 			static char name[REFLECT_MAX_NAME_LENGTH] = {};
-			u64 count = 0;
+			U64 count = 0;
 			_reflect_append_name(name, count, type_name);
 			return name;
 		};
@@ -393,25 +393,25 @@ inline static constexpr TYPE_KIND
 kind_of()
 {
 	using Type = std::remove_cvref_t<T>;
-	if constexpr (std::is_same_v<Type, i8>)
+	if constexpr (std::is_same_v<Type, I8>)
 		return TYPE_KIND_I8;
-	else if constexpr (std::is_same_v<Type, i16>)
+	else if constexpr (std::is_same_v<Type, I16>)
 		return TYPE_KIND_I16;
-	else if constexpr (std::is_same_v<Type, i32>)
+	else if constexpr (std::is_same_v<Type, I32>)
 		return TYPE_KIND_I32;
-	else if constexpr (std::is_same_v<Type, i64>)
+	else if constexpr (std::is_same_v<Type, I64>)
 		return TYPE_KIND_I64;
-	else if constexpr (std::is_same_v<Type, u8>)
+	else if constexpr (std::is_same_v<Type, U8>)
 		return TYPE_KIND_U8;
-	else if constexpr (std::is_same_v<Type, u16>)
+	else if constexpr (std::is_same_v<Type, U16>)
 		return TYPE_KIND_U16;
-	else if constexpr (std::is_same_v<Type, u32>)
+	else if constexpr (std::is_same_v<Type, U32>)
 		return TYPE_KIND_U32;
-	else if constexpr (std::is_same_v<Type, u64>)
+	else if constexpr (std::is_same_v<Type, U64>)
 		return TYPE_KIND_U64;
-	else if constexpr (std::is_same_v<Type, f32>)
+	else if constexpr (std::is_same_v<Type, F32>)
 		return TYPE_KIND_F32;
-	else if constexpr (std::is_same_v<Type, f64>)
+	else if constexpr (std::is_same_v<Type, F64>)
 		return TYPE_KIND_F64;
 	else if constexpr (std::is_same_v<Type, bool>)
 		return TYPE_KIND_BOOL;
@@ -495,7 +495,7 @@ type_of(const T)
 	return &self;
 }
 
-template <typename T, u64 N>
+template <typename T, U64 N>
 inline static constexpr const Type *
 type_of(const T (&)[N])
 {
@@ -509,7 +509,7 @@ type_of(const T (&)[N])
 	return &self;
 }
 
-#define _TYPE_OF_ENUM(VALUE) {(i32)VALUE, #VALUE}
+#define _TYPE_OF_ENUM(VALUE) {(I32)VALUE, #VALUE}
 
 #define TYPE_OF_ENUM(T, ...)                                                                    \
 inline static const Type *                                                                      \
@@ -528,43 +528,45 @@ type_of(const T)
 
 struct Enum_Value
 {
-	i32 index;
+	I32 index;
 	std::string_view name;
 };
 
 struct Enum
 {
 	std::array<Enum_Value, REFLECT_MAX_ENUM_VALUE_COUNT> values;
-	u64 count;
+	U64 count;
 };
 
-template <typename T, i32... I>
-constexpr inline static Enum
-get_enum(std::integer_sequence<i32, I...>)
+template <typename T, T V>
+constexpr inline static Enum_Value
+get_enum_value()
 {
-	// TODO: Remove -Wno-enum-constexpr-conversion.
-	constexpr auto get_enum_value = []<T V>() -> Enum_Value {
-		#if defined(_MSC_VER) // TODO: PLATFORM_WIN32.
-			constexpr auto type_function_name      = std::string_view{__FUNCSIG__};
-			constexpr auto type_name_prefix_length = type_function_name.find("()<") + 3;
-			constexpr auto type_name_length        = type_function_name.find(">", type_name_prefix_length) - type_name_prefix_length;
-		#elif defined(__GNUC__) // PLATFORM_LINUX/MACOS.
-			constexpr auto type_function_name      = std::string_view{__PRETTY_FUNCTION__};
-			constexpr auto type_name_prefix_length = type_function_name.find("= ") + 2;
-			constexpr auto type_name_length        = type_function_name.find("]", type_name_prefix_length) - type_name_prefix_length;
-		#else
-			#error "[REFLECT]: Unsupported compiler."
-		#endif
-
-		char c = type_function_name.at(type_name_prefix_length);
-		if ((c >= '0' && c <= '9') || c == '(' || c == ')')
-			return {};
-		return {(i32)V, {type_function_name.data() + type_name_prefix_length, type_name_length}};
-	};
+	#if defined(_MSC_VER) // TODO: PLATFORM_WIN32.
+		constexpr auto type_function_name      = std::string_view{__FUNCSIG__};
+		constexpr auto type_name_prefix_length = type_function_name.find("()<") + 3;
+		constexpr auto type_name_length        = type_function_name.find(">", type_name_prefix_length) - type_name_prefix_length;
+	#elif defined(__GNUC__) // PLATFORM_LINUX/MACOS.
+		constexpr auto type_function_name      = std::string_view{__PRETTY_FUNCTION__};
+		constexpr auto type_name_prefix_length = type_function_name.rfind("= ") + 2;
+		constexpr auto type_name_length        = type_function_name.find("]", type_name_prefix_length) - type_name_prefix_length;
+	#else
+		#error "[REFLECT]: Unsupported compiler."
+	#endif
+
+	char c = type_function_name.at(type_name_prefix_length);
+	if ((c >= '0' && c <= '9') || c == '(' || c == ')' || c == '-')
+		return {};
+	return {(I32)V, {type_function_name.data() + type_name_prefix_length, type_name_length}};
+}
 
+template <typename T, I32... I>
+constexpr inline static Enum
+get_enum(std::integer_sequence<I32, I...>)
+{
 	return Enum {
-		{ get_enum_value.template operator()<(T)(I + REFLECT_MIN_ENUM_VALUE)>()...},
-		((get_enum_value.template operator()<(T)(I + REFLECT_MIN_ENUM_VALUE)>().name != "") + ...)
+		{ get_enum_value<T, static_cast<T>(I + REFLECT_MIN_ENUM_VALUE)>()...},
+		((get_enum_value<T, static_cast<T>(I + REFLECT_MIN_ENUM_VALUE)>().name != "") + ...)
 	};
 }
 
@@ -573,10 +575,10 @@ requires (std::is_enum_v<T>)
 inline static constexpr const Type *
 type_of(const T)
 {
-	constexpr auto data = get_enum<T>(std::make_integer_sequence<i32, REFLECT_MAX_ENUM_VALUE_COUNT>());
+	constexpr auto data = get_enum<T>(std::make_integer_sequence<I32, REFLECT_MAX_ENUM_VALUE_COUNT>());
 
-	constexpr auto copy = [](char *dst, const char *src, u64 count) {
-		for (u64 i = 0; i < count; ++i)
+	constexpr auto copy = [](char *dst, const char *src, U64 count) {
+		for (U64 i = 0; i < count; ++i)
 			dst[i] = src[i];
 	};
 
@@ -586,7 +588,7 @@ type_of(const T)
 	static bool initialized = false;
 	if (initialized == false)
 	{
-		for (u64 i = 0, c = 0; i < REFLECT_MAX_ENUM_VALUE_COUNT; ++i)
+		for (U64 i = 0, c = 0; i < REFLECT_MAX_ENUM_VALUE_COUNT; ++i)
 		{
 			if (const auto &value = data.values[i]; value.name != "")
 			{
diff --git a/core/serialization/binary_serializer.h b/core/serialization/binary_serializer.h
index 966e7e42..e543d7fe 100644
--- a/core/serialization/binary_serializer.h
+++ b/core/serialization/binary_serializer.h
@@ -9,16 +9,16 @@
 
 struct Binary_Serializer
 {
-	Array<u8> buffer;
-	u64 offset;
+	Array<U8> buffer;
+	U64 offset;
 	bool is_valid;
 };
 
 struct Binary_Deserializer
 {
 	memory::Allocator *allocator;
-	Array<u8> buffer;
-	u64 offset;
+	Array<U8> buffer;
+	U64 offset;
 	bool is_valid;
 };
 
@@ -26,7 +26,7 @@ inline static Binary_Serializer
 binary_serializer_init(memory::Allocator *allocator = memory::heap_allocator())
 {
 	return Binary_Serializer {
-		.buffer = array_init<u8>(allocator),
+		.buffer = array_init<U8>(allocator),
 		.offset = 0,
 		.is_valid = false
 	};
@@ -72,7 +72,7 @@ serialize(Binary_Serializer &self, const T &data)
 	if (Error error = serialize(self, count_of(data)))
 		return error;
 
-	for (u64 i = 0; i < count_of(data); ++i)
+	for (U64 i = 0; i < count_of(data); ++i)
 		if (Error error = serialize(self, data[i]))
 			return error;
 
@@ -105,7 +105,7 @@ serialize(Binary_Serializer &self, const Array<T> &data)
 	if (Error error = serialize(self, data.count))
 		return error;
 
-	for (u64 i = 0; i < data.count; ++i)
+	for (U64 i = 0; i < data.count; ++i)
 		if (Error error = serialize(self, data[i]))
 			return error;
 
@@ -151,7 +151,7 @@ serialize(Binary_Serializer &self, const char *name, const T &data)
 }
 
 inline static Binary_Deserializer
-binary_deserializer_init(const Array<u8> &buffer, memory::Allocator *allocator = memory::heap_allocator())
+binary_deserializer_init(const Array<U8> &buffer, memory::Allocator *allocator = memory::heap_allocator())
 {
 	return Binary_Deserializer {
 		.allocator = allocator,
@@ -175,14 +175,14 @@ serialize(Binary_Deserializer &self, T &data)
 	if (!self.is_valid)
 		return Error{"[DESERIALIZER][BINARY]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {{\"a\", a}})'."};
 
-	u8 *d = (u8 *)&data;
-	u64 data_size = sizeof(data);
+	U8 *d = (U8 *)&data;
+	U64 data_size = sizeof(data);
 
 	if (self.offset + data_size > self.buffer.count)
 		return Error{"[DESERIALIZER][BINARY]: Trying to deserialize beyond buffer capacity."};
 
 	// TODO: Memcpy?
-	for (u64 i = 0; i < data_size; ++i)
+	for (U64 i = 0; i < data_size; ++i)
 		d[i] = self.buffer[i + self.offset];
 	self.offset += data_size;
 
@@ -214,14 +214,14 @@ serialize(Binary_Deserializer &self, T &data)
 	if (!self.is_valid)
 		return Error{"[DESERIALIZER][BINARY]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {{\"a\", a}})'."};
 
-	u64 count = 0;
+	U64 count = 0;
 	if (Error error = serialize(self, count))
 		return error;
 
 	if (count != count_of(data))
 		return Error{"[DESERIALIZER][BINARY]: Passed array count does not match the deserialized count."};
 
-	for (u64 i = 0; i < count; ++i)
+	for (U64 i = 0; i < count; ++i)
 		if (Error error = serialize(self, data[i]))
 			return error;
 
@@ -238,13 +238,13 @@ serialize(Binary_Deserializer &self, Block &block)
 		return error;
 
 	if (block.data == nullptr)
-		block.data = (u8 *)memory::allocate(self.allocator, block.size);
+		block.data = memory::allocate<U8>(self.allocator, block.size);
 
 	if (block.data == nullptr)
 		return Error{"[DESERIALIZER][BINARY]: Could not allocate memory for passed pointer type."};
 
-	for (u64 i = 0; i < block.size; ++i)
-		if (Error error = serialize(self, ((u8 *)block.data)[i]))
+	for (U64 i = 0; i < block.size; ++i)
+		if (Error error = serialize(self, ((U8 *)block.data)[i]))
 			return error;
 
 	return Error{};
@@ -263,12 +263,12 @@ serialize(Binary_Deserializer &self, Array<T> &data)
 		data = array_init<T>(self.allocator);
 	}
 
-	u64 count = 0;
+	U64 count = 0;
 	if (Error error = serialize(self, count))
 		return error;
 
 	array_resize(data, count);
-	for (u64 i = 0; i < data.count; ++i)
+	for (U64 i = 0; i < data.count; ++i)
 		if (Error error = serialize(self, data[i]))
 			return error;
 
@@ -287,12 +287,12 @@ serialize(Binary_Deserializer &self, String &data)
 		data = string_init(self.allocator);
 	}
 
-	u64 count = 0;
+	U64 count = 0;
 	if (Error error = serialize(self, count))
 		return error;
 
 	string_resize(data, count);
-	for (u64 i = 0; i < data.count; ++i)
+	for (U64 i = 0; i < data.count; ++i)
 		if (Error error = serialize(self, data[i]))
 			return error;
 
@@ -329,12 +329,12 @@ serialize(Binary_Deserializer &self, Hash_Table<K, V> &data)
 		data = hash_table_init<K, V>(self.allocator);
 	}
 
-	u64 count = 0;
+	U64 count = 0;
 	if (Error error = serialize(self, count))
 		return error;
 
 	hash_table_clear(data);
-	for (u64 i = 0; i < count; ++i)
+	for (U64 i = 0; i < count; ++i)
 	{
 		K key   = {};
 		V value = {};
@@ -362,7 +362,7 @@ serialize(Binary_Deserializer &self, const char *name, T &data)
 }
 
 template <typename T>
-inline static Result<Array<u8>>
+inline static Result<Array<U8>>
 to_binary(const T &data, memory::Allocator *allocator = memory::heap_allocator())
 {
 	Binary_Serializer self = binary_serializer_init(allocator);
@@ -374,7 +374,7 @@ to_binary(const T &data, memory::Allocator *allocator = memory::heap_allocator()
 
 template <typename T>
 inline static Error
-from_binary(const Array<u8> &buffer, T &data, memory::Allocator *allocator = memory::heap_allocator())
+from_binary(const Array<U8> &buffer, T &data, memory::Allocator *allocator = memory::heap_allocator())
 {
 	Binary_Deserializer self = binary_deserializer_init(buffer, allocator);
 	DEFER(binary_deserializer_deinit(self));
diff --git a/core/serialization/json_serializer.h b/core/serialization/json_serializer.h
index 2823b315..f86934fd 100644
--- a/core/serialization/json_serializer.h
+++ b/core/serialization/json_serializer.h
@@ -48,8 +48,8 @@ serialize(Json_Serializer &self, const T &data)
 	if (!self.is_valid)
 		return Error{"[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {{\"a\", a}})'."};
 
-	JSON_Value &value = array_last(self.values);
-	value = json_value_init_as_number((f64)data);
+	JSON_Value &value = array_back(self.values);
+	value = json_value_init_as_number((F64)data);
 	return Error{};
 }
 
@@ -59,7 +59,7 @@ serialize(Json_Serializer &self, const bool &data)
 	if (!self.is_valid)
 		return Error{"[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {{\"a\", a}})'."};
 
-	JSON_Value &value = array_last(self.values);
+	JSON_Value &value = array_back(self.values);
 	value = json_value_init_as_bool(data);
 	return Error{};
 }
@@ -80,10 +80,10 @@ serialize(Json_Serializer &self, const T &data)
 	if (!self.is_valid)
 		return Error{"[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {{\"a\", a}})'."};
 
-	JSON_Value &value = array_last(self.values);
+	JSON_Value &value = array_back(self.values);
 	value = json_value_init_as_array(self.allocator);
 
-	for (u64 i = 0; i < count_of(data); ++i)
+	for (U64 i = 0; i < count_of(data); ++i)
 	{
 		array_push(self.values, JSON_Value{});
 		if (Error error = serialize(self, data[i]))
@@ -100,9 +100,9 @@ serialize(Json_Serializer &self, const Block &block)
 	if (!self.is_valid)
 		return Error{"[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {{\"a\", a}})'."};
 
-	JSON_Value &value = array_last(self.values);
+	JSON_Value &value = array_back(self.values);
 	value.kind = JSON_VALUE_KIND_STRING;
-	value.as_string = base64_encode((const u8 *)block.data, (u32)block.size, self.allocator);
+	value.as_string = base64_encode((const U8 *)block.data, (U32)block.size, self.allocator);
 	return Error{};
 }
 
@@ -113,10 +113,10 @@ serialize(Json_Serializer &self, const Array<T> &data)
 	if (!self.is_valid)
 		return Error{"[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {{\"a\", a}})'."};
 
-	JSON_Value &value = array_last(self.values);
+	JSON_Value &value = array_back(self.values);
 	value = json_value_init_as_array(self.allocator);
 
-	for (u64 i = 0; i < data.count; ++i)
+	for (U64 i = 0; i < data.count; ++i)
 	{
 		array_push(self.values, JSON_Value{});
 		if (Error error = serialize(self, data[i]))
@@ -134,7 +134,7 @@ serialize(Json_Serializer &self, const String &data)
 	if (!self.is_valid)
 		return Error{"[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {{\"a\", a}})'."};
 
-	JSON_Value &value = array_last(self.values);
+	JSON_Value &value = array_back(self.values);
 	value.kind = JSON_VALUE_KIND_STRING;
 	value.as_string = string_copy(data, self.allocator);
 	return Error{};
@@ -153,7 +153,7 @@ serialize(Json_Serializer &self, const Hash_Table<K, V> &data)
 	if (!self.is_valid)
 		return Error{"[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {{\"a\", a}})'."};
 
-	JSON_Value &array = array_last(self.values);
+	JSON_Value &array = array_back(self.values);
 	array = json_value_init_as_array(self.allocator);
 
 	for (const Hash_Table_Entry<const K, V> &entry : data)
@@ -191,10 +191,10 @@ serialize(Json_Serializer &self, const char *name, const T &data)
 
 	JSON_Value object = array_pop(self.values);
 
-	if (json_value_object_find(array_last(self.values), name))
+	if (json_value_object_find(array_back(self.values), name))
 		log_warning("[SERIALIZER][JSON]: Overwrite of duplicate json object with name '{}'.", name);
 
-	json_value_object_insert(array_last(self.values), name, object);
+	json_value_object_insert(array_back(self.values), name, object);
 
 	return Error{};
 }
@@ -248,7 +248,7 @@ serialize(Json_Deserializer &self, T &data)
 	if (!self.is_valid)
 		return Error{"[DESERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {{\"a\", a}})'."};
 
-	data = (T)json_value_get_as_number(array_last(self.values));
+	data = (T)json_value_get_as_number(array_back(self.values));
 	return Error{};
 }
 
@@ -258,7 +258,7 @@ serialize(Json_Deserializer &self, bool &data)
 	if (!self.is_valid)
 		return Error{"[DESERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {{\"a\", a}})'."};
 
-	data = json_value_get_as_bool(array_last(self.values));
+	data = json_value_get_as_bool(array_back(self.values));
 	return Error{};
 }
 
@@ -287,11 +287,11 @@ serialize(Json_Deserializer &self, T &data)
 	if (!self.is_valid)
 		return Error{"[DESERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {{\"a\", a}})'."};
 
-	Array<JSON_Value> array_values = json_value_get_as_array(array_last(self.values));
+	Array<JSON_Value> array_values = json_value_get_as_array(array_back(self.values));
 	if (array_values.count != count_of(data))
 		return Error{"[DESERIALIZER][JSON]: Passed array count does not match the deserialized count."};
 
-	for (u64 i = 0; i < array_values.count; ++i)
+	for (U64 i = 0; i < array_values.count; ++i)
 	{
 		array_push(self.values, array_values[i]);
 
@@ -310,13 +310,13 @@ serialize(Json_Deserializer &self, Block &block)
 	if (!self.is_valid)
 		return Error{"[DESERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {{\"a\", a}})'."};
 
-	String str = json_value_get_as_string(array_last(self.values));
+	String str = json_value_get_as_string(array_back(self.values));
 
 	String o = base64_decode(str, memory::temp_allocator());
 	DEFER(string_deinit(o));
 
 	if (block.data == nullptr)
-		block.data = (u8 *)memory::allocate(self.allocator, o.count);
+		block.data = memory::allocate<U8>(self.allocator, o.count);
 
 	if (block.data == nullptr)
 		return Error{"[DESERIALIZER][JSON]: Could not allocate memory for passed pointer type."};
@@ -340,9 +340,9 @@ serialize(Json_Deserializer &self, Array<T> &data)
 		data = array_init<T>(self.allocator);
 	}
 
-	Array<JSON_Value> array_values = json_value_get_as_array(array_last(self.values));
+	Array<JSON_Value> array_values = json_value_get_as_array(array_back(self.values));
 	array_resize(data, array_values.count);
-	for (u64 i = 0; i < data.count; ++i)
+	for (U64 i = 0; i < data.count; ++i)
 	{
 		array_push(self.values, array_values[i]);
 		if (Error error = serialize(self, data[i]))
@@ -365,7 +365,7 @@ serialize(Json_Deserializer &self, String &data)
 		data = string_init(self.allocator);
 	}
 
-	String str = json_value_get_as_string(array_last(self.values));
+	String str = json_value_get_as_string(array_back(self.values));
 	string_clear(data);
 	string_append(data, str);
 
@@ -401,10 +401,10 @@ serialize(Json_Deserializer &self, Hash_Table<K, V> &data)
 		data = hash_table_init<K, V>(self.allocator);
 	}
 
-	Array<JSON_Value> array_values = array_last(self.values).as_array;
+	Array<JSON_Value> array_values = array_back(self.values).as_array;
 
 	hash_table_clear(data);
-	for (u64 i = 0; i < array_values.count; ++i)
+	for (U64 i = 0; i < array_values.count; ++i)
 	{
 		K key   = {};
 		V value = {};
@@ -434,7 +434,7 @@ serialize(Json_Deserializer &self, const char *name, T &data)
 	self.is_valid = true;
 	DEFER(self.is_valid = false);
 
-	JSON_Value json_value = json_value_object_find(array_last(self.values), name);
+	JSON_Value json_value = json_value_object_find(array_back(self.values), name);
 	if (!json_value)
 		return Error{"[DESERIALIZER][JSON]: Could not find JSON value with the provided name."};
 
@@ -465,7 +465,7 @@ to_json(const T &data, memory::Allocator *allocator = memory::heap_allocator())
 		if (Error error = serialize(self, {"data", data}))
 			return error;
 	}
-	return json_value_to_string(array_first(self.values), allocator);
+	return json_value_to_string(array_front(self.values), allocator);
 }
 
 template <typename T>
diff --git a/core/source_location.h b/core/source_location.h
index 2dd3b231..b887d8a0 100644
--- a/core/source_location.h
+++ b/core/source_location.h
@@ -6,11 +6,11 @@ struct Source_Location
 {
 	const char *file_name;
 	const char *function_name;
-	u32 line_number;
+	U32 line_number;
 };
 
 inline static consteval Source_Location
-source_location_get_current(const char *file_name = __builtin_FILE(), const char *function_name = __builtin_FUNCTION(), u32 line_number = __builtin_LINE())
+source_location_get_current(const char *file_name = __builtin_FILE(), const char *function_name = __builtin_FUNCTION(), U32 line_number = __builtin_LINE())
 {
 	return Source_Location {
 		.file_name = file_name,
diff --git a/core/tester.cpp b/core/tester.cpp
index 68318f03..37c149c1 100644
--- a/core/tester.cpp
+++ b/core/tester.cpp
@@ -11,7 +11,7 @@ tester()
 	return &instance;
 }
 
-u64
+U64
 tester_add_test(Tester *self, Tester_Test test)
 {
 	array_push(self->tests, test);
@@ -31,7 +31,7 @@ tester_run(Tester *self)
 		print_to_stdout(PRINT_COLOR_BG_RED, " GLOBAL CHECK FAILED ");
 		print_to_stdout("\n\n");
 
-		for (u64 i = 0; i < self->failed_checks.count; i++)
+		for (U64 i = 0; i < self->failed_checks.count; i++)
 		{
 			const Tester_Check &check = self->failed_checks[i];
 			print_to_stdout("  ");
@@ -46,13 +46,13 @@ tester_run(Tester *self)
 		}
 	}
 
-	u32 tests_passed = 0;
-	u32 tests_failed = 0;
-	for (u64 i = 0; i < self->tests.count; i++)
+	U32 tests_passed = 0;
+	U32 tests_failed = 0;
+	for (U64 i = 0; i < self->tests.count; i++)
 	{
-		u64 failed_checks_before = self->failed_checks.count;
+		U64 failed_checks_before = self->failed_checks.count;
 		self->tests[i].function();
-		u64 failed_checks_after = self->failed_checks.count;
+		U64 failed_checks_after = self->failed_checks.count;
 
 		if (failed_checks_after > failed_checks_before)
 		{
@@ -60,7 +60,7 @@ tester_run(Tester *self)
 			print_to_stdout(PRINT_COLOR_BG_RED, " TEST FAILED ");
 			print_to_stdout(" {}\n\n", self->tests[i].name);
 
-			for (u64 j = failed_checks_before; j < failed_checks_after; j++)
+			for (U64 j = failed_checks_before; j < failed_checks_after; j++)
 			{
 				const Tester_Check &check = self->failed_checks[j];
 				print_to_stdout("  ");
diff --git a/core/tester.h b/core/tester.h
index 2e49c0b3..9e041fe2 100644
--- a/core/tester.h
+++ b/core/tester.h
@@ -5,22 +5,26 @@
 #include "core/print.h"
 #include "core/containers/array.h"
 
-#define TESTER_TEST(name)                                                                                                                      \
-	static void CONCATENATE(tester_test_function_, __LINE__)();                                                                                \
-	static u64 CONCATENATE(registrar_, __LINE__) = tester_add_test(tester(), Tester_Test{name, CONCATENATE(tester_test_function_, __LINE__)}); \
-	static void CONCATENATE(tester_test_function_, __LINE__)()
-
-#define TESTER_CHECK(expr)                                                                                                                     \
+#define TESTER_TEST(name) _TESTER_TEST_IMPL(name, __COUNTER__)
+#define _TESTER_TEST_IMPL(name, id)                                                                                                                \
+	static void CONCATENATE(tester_test_function_, id)();                                                                                          \
+	static U64 CONCATENATE(registrar_, id) = tester_add_test(tester(), Tester_Test{name, CONCATENATE(tester_test_function_, id)});                 \
+	static void CONCATENATE(tester_test_function_, id)()
+
+// Variadic so callers can pass expressions containing braced initializers (whose
+// top-level commas would otherwise be consumed by the preprocessor as argument
+// separators).
+#define TESTER_CHECK(...)                                                                                                                     \
 	do                                                                                                                                         \
 	{                                                                                                                                          \
-		array_push((expr) ? tester()->passed_checks : tester()->failed_checks, Tester_Check{#expr, __FILE__, __LINE__});                       \
+		array_push((__VA_ARGS__) ? tester()->passed_checks : tester()->failed_checks, Tester_Check{#__VA_ARGS__, __FILE__, __LINE__});         \
 	} while (false)
 
 struct Tester_Check
 {
 	const char *expression;
 	const char *file;
-	u32 line;
+	U32 line;
 };
 
 struct Tester_Test
@@ -48,7 +52,7 @@ struct Tester
 CORE_API Tester *
 tester();
 
-CORE_API u64
+CORE_API U64
 tester_add_test(Tester *self, Tester_Test test);
 
 CORE_API bool
diff --git a/core/utils.h b/core/utils.h
index 0c49b143..c17c5d81 100644
--- a/core/utils.h
+++ b/core/utils.h
@@ -2,9 +2,9 @@
 
 #include "core/defines.h"
 
-// TODO: Add variant for u32, u64, ..etc
-inline static u64
-next_power_of_two(i32 value)
+// TODO: Add variant for U32, U64, ..etc
+inline static U64
+next_power_of_two(I32 value)
 {
 	--value;
 	value |= value >> 1;
diff --git a/docs/containers.md b/docs/containers.md
new file mode 100644
index 00000000..085f18aa
--- /dev/null
+++ b/docs/containers.md
@@ -0,0 +1,369 @@
+# Containers
+
+All containers follow the same conventions:
+
+- **`_init`** — construct (may allocate).
+- **`_deinit`** — destroy (free memory).
+- **`_copy`** — shallow copy into a new container (element-wise assignment; nested pointers/allocations are not duplicated).
+- **`clone()`** — deep copy; recursively invokes `clone()` on each class-type element. Requires a `clone()` overload for custom element types.
+- Free functions take the container as the first argument.
+- Every container that owns memory accepts a `memory::Allocator *` (defaults to `heap_allocator()`).
+
+---
+
+## Array\<T\>
+
+**Header:** `core/containers/array.h`
+
+A heap-allocated dynamic array. Owns its memory.
+
+```cpp
+#include <core/containers/array.h>
+#include <core/defer.h>
+
+auto arr = array_init<int>();
+DEFER(array_deinit(arr));
+
+array_push(arr, 10);
+array_push(arr, 20);
+array_push(arr, 30);
+
+for (int x : arr)          // range-based for supported
+    print_to(stdout, "{}\n", x);
+
+arr[0] = 99;               // bounds-checked in Debug
+```
+
+### Construction
+
+| Function | Description |
+|---|---|
+| `array_init<T>(allocator)` | Empty, zero capacity |
+| `array_init_with_capacity<T>(n, allocator)` | Empty, pre-allocated capacity |
+| `array_init_with_count<T>(n, allocator)` | Count == capacity, uninitialized |
+| `array_init_from(first, last, allocator)` | Copy from pointer range |
+| `array_init_from({1,2,3}, allocator)` | Copy from initializer list |
+| `array_copy(arr, allocator)` | Shallow copy (element-wise assignment) |
+| `clone(arr, allocator)` | Deep copy (recursively clones class elements) |
+
+### Modification
+
+| Function | Description |
+|---|---|
+| `array_push(arr, value)` | Append element |
+| `array_push(arr, value, count)` | Append same value N times |
+| `array_pop(arr)` | Remove and return last element |
+| `array_remove(arr, index)` | Swap-remove (O(1), unordered) |
+| `array_remove_if(arr, pred)` | Swap-remove matching elements |
+| `array_remove_ordered(arr, index)` | Ordered remove (O(n)) |
+| `array_append(arr, other)` | Append all elements of another array |
+| `array_fill(arr, value)` | Set all elements to value |
+| `array_clear(arr)` | Set count = 0 (no deallocation) |
+| `array_resize(arr, n)` | Resize count (grows if needed) |
+| `array_reserve(arr, extra)` | Reserve additional capacity |
+
+### Query
+
+| Function | Description |
+|---|---|
+| `array_is_empty(arr)` | `count == 0` |
+| `array_front(arr)` | Reference to first element |
+| `array_back(arr)` | Reference to last element |
+
+---
+
+## Stack\_Array\<T, N\>
+
+**Header:** `core/containers/stack_array.h`
+
+A fixed-capacity array stored entirely on the stack. No allocation, no `_deinit` needed.
+
+```cpp
+#include <core/containers/stack_array.h>
+
+Stack_Array<int, 8> arr{};
+stack_array_push(arr, 1);
+stack_array_push(arr, 2);
+
+int last = stack_array_pop(arr);   // 2
+stack_array_clear(arr);
+```
+
+Supports range-based `for` via `begin()` / `end()`.
+
+### Functions
+
+| Function | Description |
+|---|---|
+| `stack_array_push(arr, value)` | Append element (asserts if at capacity) |
+| `stack_array_pop(arr)` | Remove and return last element |
+| `stack_array_clear(arr)` | Set count = 0 |
+
+---
+
+## Span\<T\>
+
+**Header:** `core/containers/span.h`
+
+A **non-owning view** over a contiguous sequence. Never allocates. No `_deinit`.
+
+```cpp
+#include <core/containers/span.h>
+
+Array<int> arr = array_init_from<int>({1, 2, 3, 4, 5});
+DEFER(array_deinit(arr));
+
+Span<int>       view  = span_init(arr);           // mutable view
+Span<const int> cview = span_init((const Array<int>&)arr); // read-only view
+```
+
+### Construction
+
+| Function | Returns | Description |
+|---|---|---|
+| `span_init(T *data, u64 count)` | `Span<T>` | From pointer + count |
+| `span_init(T *first, T *last)` | `Span<T>` | From pointer pair |
+| `span_init(T (&arr)[N])` | `Span<T>` | From C array |
+| `span_init(Array<T> &)` | `Span<T>` | Mutable view of Array |
+| `span_init(const Array<T> &)` | `Span<const T>` | Read-only view of Array |
+| `span_init(Stack_Array<T,N> &)` | `Span<T>` | Mutable view of Stack\_Array |
+| `span_init(const char *)` | `Span<const char>` | View of a C string (no null) |
+| `span_init({1,2,3})` | `Span<const T>` | From initializer\_list — **only safe as a function argument**, never store in a variable |
+
+### Functions
+
+| Function | Description |
+|---|---|
+| `span_is_empty(span)` | `count == 0` |
+| `span_first(span)` | Reference to first element |
+| `span_last(span)` | Reference to last element |
+
+Supports range-based `for` via `begin()` / `end()`.
+
+> **Lifetime rule for initializer\_list:** The backing array of `std::initializer_list` is a temporary. It lives only for the duration of the enclosing full-expression. Never store a `Span` constructed from `{}` in a named variable — pass it directly as a function argument.
+
+---
+
+## String
+
+**Header:** `core/containers/string.h`
+
+`String` is a typedef for `Array<char>`. It always carries a null terminator at `data[count]` (not counted in `count`).
+
+```cpp
+#include <core/containers/string.h>
+
+String s = string_from("hello");
+DEFER(string_deinit(s));
+
+string_append(s, " world");
+print_to(stdout, "{}\n", s.data);   // "hello world"
+```
+
+### Construction
+
+| Function | Description |
+|---|---|
+| `string_init(allocator)` | Empty string with null terminator |
+| `string_from(c_string, allocator)` | Copy from `const char *` |
+| `string_from(first, last, allocator)` | Copy from pointer range |
+| `string_literal(c_string)` | Non-owning view (no allocation, no `_deinit`) |
+| `string_copy(str, allocator)` | Copy into a new allocation (byte-for-byte) |
+
+### Common operations
+
+```cpp
+string_append(s, other);          // append String
+string_append(s, "suffix");       // append c-string
+string_push(s, 'x');              // append single char
+string_clear(s);                  // reset count, keep allocation
+u64 len = string_length(s);       // same as s.count
+bool eq  = string_equal(s, other);
+```
+
+---
+
+## Hash\_Table\<K, V\>
+
+**Header:** `core/containers/hash_table.h`
+
+An open-addressing hash table with tombstone deletion.
+
+```cpp
+#include <core/containers/hash_table.h>
+
+auto table = hash_table_init<String, int>();
+DEFER(hash_table_deinit(table));
+
+hash_table_insert(table, string_literal("apples"), 5);
+hash_table_insert(table, string_literal("bananas"), 3);
+
+if (auto *entry = hash_table_find(table, string_literal("apples")))
+    print_to(stdout, "apples: {}\n", entry->value);
+
+hash_table_remove(table, string_literal("apples"));
+```
+
+Operator `[]` also works and inserts a default value if the key is absent:
+
+```cpp
+table[string_literal("pears")] = 7;
+```
+
+Iterate with range-based `for` — yields `Hash_Table_Entry<K, V>` references:
+
+```cpp
+for (auto &entry : table)
+    print_to(stdout, "{} = {}\n", entry.key.data, entry.value);
+```
+
+Custom types need a `hash()` overload — see [Hash](hash.md).
+
+### Construction
+
+| Function | Description |
+|---|---|
+| `hash_table_init<K,V>(allocator)` | Empty table |
+| `hash_table_init_with_capacity<K,V>(n, allocator)` | Pre-allocated capacity |
+| `hash_table_init_from<K,V>({...}, allocator)` | From initializer list of `Hash_Table_Entry<K,V>` |
+| `hash_table_copy(table, allocator)` | Shallow copy |
+| `clone(table, allocator)` | Deep copy (recursively clones class-type keys and values) |
+
+### Functions
+
+| Function | Description |
+|---|---|
+| `hash_table_insert(table, key, value)` | Insert or update |
+| `hash_table_find(table, key)` | Returns `const Hash_Table_Entry<const K, V> *` or `nullptr` |
+| `hash_table_contains(table, key)` | `true` if key exists |
+| `hash_table_remove(table, key)` | Swap-remove (O(1), entry order not preserved) |
+| `hash_table_remove_ordered(table, key)` | Ordered remove (O(n), preserves insertion order) |
+| `hash_table_reserve(table, extra)` | Reserve additional capacity |
+| `hash_table_clear(table)` | Remove all entries (keep allocation) |
+| `destroy(table)` | Calls `destroy()` on class-type keys/values, then deinits |
+
+---
+
+## Hash\_Set\<K\>
+
+**Header:** `core/containers/hash_set.h`
+
+A `Hash_Table<K, Hash_Set_Value>` alias. Same API minus values.
+
+```cpp
+#include <core/containers/hash_set.h>
+
+auto set = hash_set_init<int>();
+DEFER(hash_set_deinit(set));
+
+hash_set_insert(set, 42);
+hash_set_insert(set, 99);
+
+bool has = hash_set_contains(set, 42);  // true
+hash_set_remove(set, 42);
+```
+
+### Construction
+
+| Function | Description |
+|---|---|
+| `hash_set_init<K>(allocator)` | Empty set |
+| `hash_set_init_with_capacity<K>(n, allocator)` | Pre-allocated capacity |
+| `hash_set_init_from<K>({...}, allocator)` | From initializer list |
+| `hash_set_copy(set, allocator)` | Shallow copy |
+| `clone(set, allocator)` | Deep copy (recursively clones class-type keys) |
+
+### Functions
+
+| Function | Description |
+|---|---|
+| `hash_set_insert(set, key)` | Insert (no-op if already present) |
+| `hash_set_find(set, key)` | Returns `const K *` or `nullptr` |
+| `hash_set_contains(set, key)` | `true` if key exists |
+| `hash_set_remove(set, key)` | Swap-remove (O(1)) |
+| `hash_set_remove_ordered(set, key)` | Ordered remove (O(n)) |
+| `hash_set_reserve(set, extra)` | Reserve additional capacity |
+| `hash_set_clear(set)` | Remove all entries (keep allocation) |
+| `destroy(set)` | Calls `destroy()` on class-type keys, then deinits |
+
+---
+
+## String\_Interner
+
+**Header:** `core/containers/string_interner.h`
+
+Deduplicates strings — equal strings are stored once and return the same `const char *` pointer. Pointer equality replaces string comparison.
+
+```cpp
+#include <core/containers/string_interner.h>
+
+String_Interner interner = string_interner_init(memory::heap_allocator());
+DEFER(string_interner_deinit(interner));
+
+const char *a = string_interner_intern(interner, "hello");
+const char *b = string_interner_intern(interner, "hello");
+
+assert(a == b);  // same pointer
+```
+
+Also accepts a pointer range:
+
+```cpp
+const char *s = string_interner_intern(interner, first, last);
+```
+
+---
+
+## Ring\_Buffer\<T\>
+
+**Header:** `core/containers/ring_buffer.h`
+
+A heap-allocated growable double-ended circular buffer. Supports efficient push and pop from both ends. Grows automatically (×1.5) by linearising its internal layout on reallocation.
+
+Indexed access via `rb[i]` operates in logical order (`0` = front element) regardless of the internal `head` offset.
+
+```cpp
+#include <core/containers/ring_buffer.h>
+
+auto rb = ring_buffer_init<int>();
+DEFER(ring_buffer_deinit(rb));
+
+ring_buffer_push_back(rb, 1);
+ring_buffer_push_back(rb, 2);
+ring_buffer_push_back(rb, 3);
+
+ring_buffer_pop_front(rb);          // remove from front (FIFO)
+
+int front = ring_buffer_front(rb);  // 2
+int back  = ring_buffer_back(rb);   // 3
+```
+
+> Range-based `for` is **not** supported — the data is circular and raw pointer iteration would yield incorrect results.  
+> Use indexed access instead: `for (u64 i = 0; i < rb.count; ++i) rb[i]`
+
+### Construction
+
+| Function | Description |
+|---|---|
+| `ring_buffer_init<T>(allocator)` | Empty, zero capacity |
+| `ring_buffer_copy(rb, allocator)` | Shallow copy, linearised (`head = 0`) |
+| `clone(rb, allocator)` | Deep copy (recursively clones class-type elements) |
+
+### Modification
+
+| Function | Description |
+|---|---|
+| `ring_buffer_push_back(rb, value)` | Append element at back |
+| `ring_buffer_push_front(rb, value)` | Prepend element at front |
+| `ring_buffer_pop_front(rb)` | Remove element from front |
+| `ring_buffer_pop_back(rb)` | Remove element from back |
+| `ring_buffer_reserve(rb, extra)` | Reserve additional capacity (linearises on reallocation) |
+| `ring_buffer_clear(rb)` | Set count = 0, reset head (no deallocation) |
+
+### Query
+
+| Function | Description |
+|---|---|
+| `ring_buffer_front(rb)` | Reference to front element (`rb[0]`) |
+| `ring_buffer_back(rb)` | Reference to back element (`rb[count-1]`) |
+| `ring_buffer_is_empty(rb)` | `count == 0` |
diff --git a/docs/defer.md b/docs/defer.md
new file mode 100644
index 00000000..6e3675c3
--- /dev/null
+++ b/docs/defer.md
@@ -0,0 +1,42 @@
+# Defer
+
+**Header:** `core/defer.h`
+
+`DEFER(code)` schedules `code` to run at the end of the current scope, regardless of how it exits. It is a scope-guard implemented as an RAII lambda wrapper.
+
+---
+
+## Usage
+
+```cpp
+#include <core/defer.h>
+#include <core/containers/array.h>
+
+void process()
+{
+    auto arr = array_init<int>();
+    DEFER(array_deinit(arr));   // runs when process() returns
+
+    array_push(arr, 1);
+    array_push(arr, 2);
+    // arr is freed here automatically
+}
+```
+
+Multiple defers run in **reverse order** (last-in, first-out):
+
+```cpp
+FILE *f = fopen("data.bin", "rb");
+DEFER(fclose(f));
+
+auto *arena = memory::arena_allocator_init();
+DEFER(memory::arena_allocator_deinit(arena));  // runs first on exit
+```
+
+---
+
+## Notes
+
+- Uses `__COUNTER__` internally — safe in unity builds.
+- Captures by reference (`[&]`) — be cautious with loop variables.
+- No heap allocation — the lambda is stored on the stack.
diff --git a/docs/ecs.md b/docs/ecs.md
new file mode 100644
index 00000000..157d3560
--- /dev/null
+++ b/docs/ecs.md
@@ -0,0 +1,81 @@
+# ECS (Entity Component System)
+
+**Header:** `core/ecs.h`
+
+A minimal, type-safe ECS built on `Hash_Table` and `Pool_Allocator`.
+
+---
+
+## Entities
+
+```cpp
+#include <core/ecs.h>
+
+ecs::Entity e = ecs::entity_new();
+
+if (e)   // Entity is valid
+    ...
+
+e.id     // underlying u64 — U64_MAX means invalid
+```
+
+---
+
+## Components
+
+Define any struct as a component — no base class or macro required:
+
+```cpp
+struct Transform { float x, y, z; };
+struct Health    { int current, max; };
+```
+
+---
+
+## World
+
+```cpp
+ecs::World world = ecs::world_init();
+DEFER(ecs::world_deinit(world));
+```
+
+---
+
+## Adding & Accessing Components
+
+```cpp
+ecs::Entity player = ecs::entity_new();
+
+ecs::world_add_component(world, player, Transform{0.f, 0.f, 0.f});
+ecs::world_add_component(world, player, Health{100, 100});
+
+// Write
+Transform *t = ecs::world_get_component<Transform>(world, player);
+t->x += 1.f;
+
+// Read-only
+const Health *h = ecs::world_get_component<const Health>(world, player);
+```
+
+Returns `nullptr` if the entity doesn't have that component.
+
+---
+
+## Removing Components
+
+```cpp
+ecs::world_remove_component<Transform>(world, player);
+```
+
+---
+
+## Iterating
+
+```cpp
+ecs::world_for_each<Transform, Health>(world, [](ecs::Entity e, Transform &t, Health &h) {
+    t.y -= 9.8f;
+    h.current -= 1;
+});
+```
+
+Only entities that have **all** listed component types are visited.
diff --git a/docs/formatter.md b/docs/formatter.md
new file mode 100644
index 00000000..798763ae
--- /dev/null
+++ b/docs/formatter.md
@@ -0,0 +1,107 @@
+# Formatter
+
+**Header:** `core/formatter.h`
+
+Type-safe string formatting using `{}` placeholders. No `printf`-style format strings — the type is known at compile time.
+
+---
+
+## `format()`
+
+Returns a `String` (heap-allocated by default):
+
+```cpp
+#include <core/formatter.h>
+
+String s = format("Hello, {}! You are {} years old.", name, age);
+DEFER(string_deinit(s));
+```
+
+Pass an explicit allocator as the **last** argument:
+
+```cpp
+String s = format("x = {}", x, memory::temp_allocator());
+```
+
+---
+
+## Format Specifiers
+
+Specifiers go inside the braces: `{specifier}` or `{specifier:options}`.
+
+| Specifier | Meaning | Example |
+|---|---|---|
+| _(none)_ | Default representation | `{}` |
+| `d` | Decimal integer | `{d}` |
+| `x` | Hex lowercase | `{x}` → `ff` |
+| `X` | Hex uppercase | `{X}` → `FF` |
+| `b` | Binary | `{b}` → `1010` |
+| `o` | Octal | `{o}` |
+| `p` | Pointer | `{p}` |
+| `c` | Character | `{c}` |
+
+### Width & Alignment
+
+```cpp
+format("{<10}", "left");    // left-aligned in 10 chars
+format("{>10}", "right");   // right-aligned
+format("{^10}", "center");  // centered
+format("{010d}", 42);       // zero-padded: "0000000042"
+```
+
+### Precision (floats)
+
+```cpp
+format("{.2}", 3.14159f);   // "3.14"
+format("{.4}", 3.14159f);   // "3.1416"
+```
+
+---
+
+## `Formatter` — Incremental Building
+
+Use `Formatter` when building a string in multiple steps:
+
+```cpp
+#include <core/formatter.h>
+
+Formatter fmt = formatter_init();
+DEFER(formatter_deinit(fmt));
+
+format_value(fmt, "Name: ");
+format_value(fmt, player.name);
+format_value(fmt, ", Score: ");
+format_value(fmt, player.score);
+
+print_to(stdout, "{}\n", fmt.buffer.data);
+```
+
+---
+
+## Custom Type Formatting
+
+Add a `format_value(Formatter &, const T &, Format_Options)` overload:
+
+```cpp
+struct Vec3 { float x, y, z; };
+
+inline static void
+format_value(Formatter &fmt, const Vec3 &v, Format_Options options = {})
+{
+    format_value(fmt, '(');
+    format_value(fmt, v.x, options);
+    format_value(fmt, ", ");
+    format_value(fmt, v.y, options);
+    format_value(fmt, ", ");
+    format_value(fmt, v.z, options);
+    format_value(fmt, ')');
+}
+```
+
+Then:
+
+```cpp
+Vec3 pos = {1.0f, 2.0f, 3.0f};
+String s = format("pos = {}", pos);
+// "pos = (1.000000, 2.000000, 3.000000)"
+```
diff --git a/docs/hash.md b/docs/hash.md
new file mode 100644
index 00000000..b88e31a7
--- /dev/null
+++ b/docs/hash.md
@@ -0,0 +1,48 @@
+# Hash
+
+**Header:** `core/hash.h`
+
+Generic hashing used by `Hash_Table` and `Hash_Set`.
+
+---
+
+## Built-in Overloads
+
+All primitive types are covered out of the box: `bool`, `char`, `i8`–`i64`, `u8`–`u64`, `f32`, `f64`, raw pointers.
+
+For strings (`String` / `const char *`) there are also overloads producing content-based hashes.
+
+---
+
+## Custom Type
+
+Add an overload of `hash(const T &)` returning `u64`:
+
+```cpp
+struct Vec2 { float x, y; };
+
+inline static u64
+hash(const Vec2 &v)
+{
+    u64 h = hash(v.x);
+    h ^= hash(v.y) + 0x9e3779b9 + (h << 6) + (h >> 2);
+    return h;
+}
+```
+
+Once defined, `Vec2` can be used as a `Hash_Table` key:
+
+```cpp
+auto table = hash_table_init<Vec2, int>();
+hash_table_insert(table, Vec2{1.f, 2.f}, 42);
+```
+
+---
+
+## Raw FNV hash
+
+For hashing arbitrary bytes directly:
+
+```cpp
+u64 h = hash_fnv_x32(data_ptr, byte_count);
+```
diff --git a/docs/home.md b/docs/home.md
new file mode 100644
index 00000000..4ba7b03d
--- /dev/null
+++ b/docs/home.md
@@ -0,0 +1,57 @@
+# Core — Documentation
+
+**Core** is a C-like C++20 library providing foundational utilities for data-oriented programming. It replaces the STL with a simpler, more explicit set of containers, allocators, and utilities.
+
+---
+
+## Modules
+
+| Module | Header | Description |
+|---|---|---|
+| [Memory & Allocators](memory.md) | `core/memory/memory.h` | Allocator interface, heap, arena, pool, temp allocators |
+| [Containers](containers.md) | `core/containers/` | Array, Stack\_Array, Span, String, Hash\_Table, Hash\_Set, String\_Interner |
+| [Formatter](formatter.md) | `core/formatter.h` | `format()` / `Formatter` — type-safe string formatting |
+| [Print & Log](print-log.md) | `core/print.h`, `core/log.h` | Colored output, log levels |
+| [Defer](defer.md) | `core/defer.h` | RAII scope-exit macro |
+| [Validate](validate.md) | `core/validate.h` | Runtime assertions with source location |
+| [Result & Error](result.md) | `core/result.h` | Error-returning pattern without exceptions |
+| [Hash](hash.md) | `core/hash.h` | FNV-32 and type-generic `hash()` overloads |
+| [Reflect](reflect.md) | `core/reflect.h` | Compile-time type reflection: kinds, names, fields, enums |
+| [Serialization](serialization.md) | `core/serialization/` | Binary and JSON serializers |
+| [ECS](ecs.md) | `core/ecs.h` | Minimal entity-component system |
+| [Platform](platform.md) | `core/platform/platform.h` | File I/O, paths, dialogs |
+
+---
+
+## Quick Start
+
+```cmake
+# CMakeLists.txt
+add_subdirectory(core)
+target_link_libraries(my_app PRIVATE core)
+```
+
+```cpp
+#include <core/containers/array.h>
+#include <core/defer.h>
+
+auto arr = array_init<int>();
+DEFER(array_deinit(arr));
+
+array_push(arr, 1);
+array_push(arr, 2);
+array_push(arr, 3);
+
+for (int x : arr)
+    print_to(stdout, "{}\n", x);
+```
+
+---
+
+## Design Philosophy
+
+- **C-like style** — free functions over methods, explicit over implicit.
+- **No hidden allocations** — every container takes an `Allocator *`.
+- **No exceptions** — errors are returned as `Error` values.
+- **No RTTI** — compile-time reflection via `reflect.h`.
+- **Explicit lifetimes** — `_init` / `_deinit` pairs, `DEFER` for cleanup.
diff --git a/docs/math.md b/docs/math.md
new file mode 100644
index 00000000..b4090815
--- /dev/null
+++ b/docs/math.md
@@ -0,0 +1,224 @@
+# Math
+
+Header-only linear-algebra + scalar math primitives. Type-prefixed free functions (`f32_sqrt`, `f32x3_dot`, `f32x4x4_look_at`); no namespaces, no templates outside the generic container types that happen to use math internally.
+
+**Headers:** per-type, included individually (`core/math/f32.h`, `core/math/f32x3.h`, `core/math/f32x4x4.h`, `core/math/quaternion.h`, `core/math/random.h`, etc.). Pull in exactly what each translation unit uses.
+
+---
+
+## Canonical coordinate convention
+
+Consistent across every platform. Documented once; enforced by the API.
+
+| Property | Value |
+|---|---|
+| Handedness | Right-handed |
+| World axes | `+X` right, `+Y` up, `+Z` toward the viewer |
+| Axis constants | `F32X3_RIGHT = {1,0,0}`, `F32X3_UP = {0,1,0}`, `F32X3_FORWARD = {0,0,-1}` |
+| Matrix storage | Row-major. `F32x4x4` layout is `[m00 m01 m02 m03 \| m10 m11 ... \| ...]` |
+| Multiplication | **Row-vector**: `v * M`, not `M * v`. Translation lives in the last row. |
+| Angle units | Radians unless the parameter name ends in `_degrees` |
+| Clip / NDC | Y-up, Z in `[0, 1]` (Metal / D3D-native) |
+| UV origin | Bottom-left |
+
+### CPU ↔ GPU matrix layout (transpose duality)
+
+Matrices upload to GPU buffers **as-is** — no transpose on upload, no `layout(row_major)` annotation on shader uniforms. This works because:
+
+- CPU: row-major storage + row-vector multiply (`clip = position * mvp`).
+- GPU: GLSL default is column-major storage + column-vector multiply (`clip = mvp * position`).
+
+The "row-major vs column-major" memory reinterpretation and the "row-vector vs column-vector" multiplication order cancel exactly — same memory, same result.
+
+**Do not** add `layout(row_major)` to shader matrix uniforms. It would re-introduce the mismatch.
+
+---
+
+## Type reference
+
+All types are POD. No constructors, no destructors. Initialize via aggregate init: `F32x3{1.0f, 2.0f, 3.0f}`.
+
+### Vectors
+
+| Type | Lanes | Storage | Notes |
+|---|---|---|---|
+| `F32x2` | 2 × F32 | 8 B, scalar | |
+| `F32x3` | 3 × F32 | 12 B packed | GPU-attribute interop |
+| `F32x4` | 4 × F32 | 16 B SIMD-backed | `alignas(16)` |
+| `F64x2` | 2 × F64 | 16 B SIMD-backed | `alignas(16)` |
+| `F64x3` | 3 × F64 | 24 B packed | |
+| `F64x4` | 4 × F64 | 32 B SIMD-backed | `alignas(32)`, one `__m256d` on AVX |
+| `I32x2` / `I32x3` | scalar | packed | texture coords, grid indices |
+| `I32x4` | 4 × I32 | 16 B SIMD-backed | SIMD masks, packed flags |
+| `U32x2` / `U32x3` | scalar | packed | |
+| `U32x4` | 4 × U32 | 16 B SIMD-backed | |
+
+### Matrices
+
+| Type | Size | Storage | Notes |
+|---|---|---|---|
+| `F32x2x2` | 16 B | scalar | Row-major `[m00 m01 / m10 m11]` |
+| `F32x3x3` | 48 B | SIMD, 3 padded rows | Matches `std140` / MSL `matrix_float3x3` |
+| `F32x4x4` | 64 B | SIMD, 4 rows | std140-compatible |
+| `F64x2x2` | 32 B | scalar | |
+| `F64x3x3` | 96 B | SIMD, 3 padded rows | |
+| `F64x4x4` | 128 B | SIMD, 4 rows | |
+
+### Other
+
+| Type | Description |
+|---|---|
+| `Quaternion` | `{w, x, y, z}` unit-quaternion rotation |
+| `Random` | Explicit-state xoshiro256** PRNG (256-bit state) |
+
+### Constants
+
+| Scope | Constants |
+|---|---|
+| Angular (F32) | `F32_PI`, `F32_TAU`, `F32_PI_OVER_2`, `F32_TO_DEGREES`, `F32_TO_RADIANS` |
+| Angular (F64) | `F64_PI`, `F64_TAU`, `F64_PI_OVER_2`, `F64_TO_DEGREES`, `F64_TO_RADIANS` |
+| Special values | `F32_EPSILON`, `F32_INFINITY`, `F32_NEG_INFINITY`, `F32_NAN` (+ `F64_*` mirror) |
+| Limits | `F32_MIN`, `F32_MAX`, `I32_MIN`, `I32_MAX`, `U32_MAX`, ... (in `core/defines.h`) |
+| Axis constants | `F32X3_RIGHT`, `F32X3_UP`, `F32X3_FORWARD`, `F32X3_LEFT`, `F32X3_DOWN`, `F32X3_BACKWARD`, `F32X3_ZERO`, `F32X3_ONE` |
+| Identities | `QUATERNION_IDENTITY` |
+
+---
+
+## Usage
+
+### Building a view-projection matrix and transforming a point
+
+```cpp
+#include <core/math/f32x3.h>
+#include <core/math/f32x4x4.h>
+
+F32x3 eye    = {0.0f, 0.0f, 5.0f};
+F32x3 target = F32X3_ZERO;
+
+F32x4x4 view = f32x4x4_look_at(eye, target, F32X3_UP);
+F32x4x4 proj = f32x4x4_perspective(60.0f * F32_TO_RADIANS, 16.0f / 9.0f, 0.1f, 1000.0f);
+F32x4x4 vp   = view * proj;
+
+F32x4 world_pos = {1.0f, 2.0f, 3.0f, 1.0f};
+F32x4 clip_pos  = world_pos * vp;          // row-vector convention
+```
+
+### Composing a TRS transform
+
+```cpp
+F32x3 translation = {5.0f, 0.0f, 0.0f};
+Quaternion rotation = quaternion_from_axis_angle(F32X3_UP, 45.0f * F32_TO_RADIANS);
+F32x3 scale = {2.0f, 2.0f, 2.0f};
+
+// Apply in the order "scale first, then rotate, then translate" — reads left-to-right.
+F32x4x4 trs = f32x4x4_scaling(scale)
+            * f32x4x4_from_quaternion(rotation)
+            * f32x4x4_translation(translation);
+
+// Decompose back.
+F32x3 out_t;
+Quaternion out_r;
+F32x3 out_s;
+bool ok = f32x4x4_decompose(trs, &out_t, &out_r, &out_s);
+```
+
+### Rotating a vector by a quaternion
+
+```cpp
+Quaternion q = quaternion_from_axis_angle(F32X3_UP, F32_PI_OVER_2);
+
+F32x3 rotated = F32X3_RIGHT * q;           // 90° yaw → forward direction
+// rotated ≈ F32X3_FORWARD
+```
+
+### Interpolating
+
+```cpp
+// Scalar
+F32 brightness = f32_lerp(0.0f, 1.0f, t);
+F32 smoothed   = f32_smoothstep(0.2f, 0.8f, t);
+
+// Vector
+F32x3 pos = f32x3_lerp(start, end, t);
+
+// Rotation — always use slerp, not lerp.
+Quaternion r = quaternion_slerp(start_orientation, end_orientation, t);
+
+// Camera follow with critical damping.
+static F32 velocity = 0.0f;
+camera.distance = f32_smooth_damp(camera.distance, target_distance, &velocity, 0.2f, dt);
+```
+
+### Screen-space projection / picking
+
+```cpp
+F32x4 viewport = {0.0f, 0.0f, (F32)window_w, (F32)window_h};
+
+// World → screen.
+F32x3 screen = f32x3_project(world_pos, vp, viewport);
+
+// Mouse click → pick ray.
+F32x4x4 vp_inv = f32x4x4_inverse(vp);
+F32x3 near_pt = f32x3_unproject(F32x3{mouse_x, mouse_y, 0.0f}, vp_inv, viewport);
+F32x3 far_pt  = f32x3_unproject(F32x3{mouse_x, mouse_y, 1.0f}, vp_inv, viewport);
+F32x3 ray_dir = f32x3_normalize(far_pt - near_pt);
+```
+
+### Integer vectors
+
+```cpp
+U32x2 texture_size = {1920u, 1080u};
+I32x3 cell = {grid_x, grid_y, grid_z};
+
+U32x2 clamped = u32x2_min(texture_size, U32x2{4096u, 4096u});
+```
+
+### Random
+
+```cpp
+Random rng = random_from_seed(0xDEADBEEF);
+
+F32 roll         = f32_random_range(rng, 0.0f, 1.0f);
+I32 die          = i32_random_range(rng, 1, 6);
+F32x3 in_sphere  = f32x3_random_in_unit_sphere(rng);
+F32x3 on_surface = f32x3_random_on_unit_sphere(rng);
+Quaternion q     = quaternion_random(rng);
+```
+
+Seeded and explicit-state — deterministic for replays, network sync, tests. No `srand()`-style hidden global.
+
+### Formatted logging
+
+Math types format natively via `core/formatter.h` (transitively available through `core/print.h` and `core/log.h`):
+
+```cpp
+log_info("Camera pos:  %",   camera.position);   // "{0.0, 1.5, 10.0}"
+log_info("View matrix:\n %", view);              // row-by-row
+log_info("Rotation:    %",   orientation);       // "{w=1.0, x=0.0, y=0.0, z=0.0}"
+log_warn("Dirty texel %",    texel);             // "{10, 20}"
+```
+
+---
+
+## SIMD
+
+The 4-wide types (`F32x4`, `F32x3x3`, `F32x4x4`, `F64x2`, `F64x4`, `F64x3x3`, `F64x4x4`, `I32x4`, `U32x4`) are SIMD-backed. Call sites don't see the SIMD details — the storage is a union of scalar fields and the SIMD register, and ops dispatch at compile time.
+
+| Arch | Baseline | Wrapper types |
+|---|---|---|
+| ARM64 (Apple Silicon) | NEON | `float32x4_t`, `float64x2_t`, `int32x4_t`, ... |
+| x86_64 (Windows/Linux) | AVX (Sandy Bridge 2011+) | `__m128`, `__m128d`, `__m256d`, `__m128i` |
+
+CMake sets `SIMD_NEON=1` or `SIMD_AVX=1` automatically. `-DCORE_SIMD_FORCE_SCALAR=ON` falls back to the scalar path for parity testing — same code, same results (within 1e-6 for non-associative ops like matmul).
+
+Anything beyond the baseline (AVX2, AVX-512) is not in scope. The math library targets CPUs from 2011+.
+
+---
+
+## Field conventions
+
+- All ops are free functions prefixed by the type: `f32x3_dot`, `f32x4x4_inverse`, `quaternion_slerp`.
+- No hidden state. `f32_random_*` / `quaternion_random` take an explicit `Random &`.
+- Angle parameters are radians unless named `_degrees`.
+- `*_approx_equal(a, b, epsilon)` — always requires an explicit epsilon (no magic default).
+- `*_length_squared` — name fixed from the older "norm" which was misleading (it returned the squared length, not the length).
diff --git a/docs/memory.md b/docs/memory.md
new file mode 100644
index 00000000..05c31ca5
--- /dev/null
+++ b/docs/memory.md
@@ -0,0 +1,116 @@
+# Memory & Allocators
+
+**Header:** `core/memory/memory.h`
+
+All containers and most utilities accept a `memory::Allocator *`. This makes the source of every allocation explicit and swappable.
+
+---
+
+## Allocator Interface
+
+```cpp
+namespace memory {
+    struct Allocator {
+        virtual void *allocate(u64 size) = 0;
+        virtual void  deallocate(void *data) = 0;
+        virtual void  clear() {}   // optional — only arena supports it
+    };
+}
+```
+
+---
+
+## Built-in Allocators
+
+### Heap Allocator
+
+Wraps `malloc` / `free`. The default for all containers.
+
+```cpp
+memory::Allocator *alloc = memory::heap_allocator();
+void *ptr = memory::allocate(alloc, 1024);
+memory::deallocate(alloc, ptr);
+```
+
+### Temp (Scratch) Allocator
+
+A per-thread arena that is intended to be cleared every frame / tick. Use it for short-lived strings and intermediate buffers. **Never store pointers from it across frames.**
+
+```cpp
+String msg = format("Hello {}!", name, memory::temp_allocator());
+// msg.data is valid until temp_allocator is cleared
+```
+
+### Arena Allocator
+
+Bump-pointer allocator. `deallocate` is a no-op — memory is reclaimed all at once with `clear()` or `deinit`. Default capacity is 4 MB.
+
+```cpp
+#include <core/memory/arena_allocator.h>
+
+auto *arena = memory::arena_allocator_init();           // 4 MB default
+// or: memory::arena_allocator_init(64 * 1024 * 1024); // 64 MB
+
+auto arr = array_init<int>(arena);
+array_push(arr, 42);
+
+memory::arena_allocator_clear(arena);   // reclaim all at once
+memory::arena_allocator_deinit(arena);  // free the arena itself
+```
+
+Key functions:
+
+| Function | Description |
+|---|---|
+| `arena_allocator_init(capacity, backing)` | Create arena with given capacity |
+| `arena_allocator_deinit(arena)` | Destroy arena |
+| `arena_allocator_clear(arena)` | Reset offset to 0 (reuse memory) |
+| `arena_allocator_get_used_size(arena)` | Bytes currently in use |
+| `arena_allocator_get_peak_size(arena)` | Peak usage since last clear |
+
+### Pool Allocator
+
+Fixed-size chunk allocator. All chunks are the same size. O(1) alloc and dealloc.
+
+```cpp
+#include <core/memory/pool_allocator.h>
+
+// Pool of 256 chunks, each 64 bytes
+auto *pool = memory::pool_allocator_init(64, 256);
+
+void *chunk = memory::pool_allocator_allocate(pool);
+// ... use chunk ...
+memory::pool_allocator_deallocate(pool, chunk);
+memory::pool_allocator_deinit(pool);
+```
+
+---
+
+## Typed helpers
+
+`memory.h` provides typed wrappers for convenience:
+
+```cpp
+// Allocate sizeof(T) bytes
+MyStruct *s = memory::allocate<MyStruct>(allocator);
+
+// Allocate + call constructor
+MyStruct *s = memory::allocate_and_call_constructor<MyStruct>(allocator, arg1, arg2);
+
+// Allocate zeroed
+void *p = memory::allocate_zeroed(allocator, size);
+```
+
+---
+
+## Custom Allocator
+
+Inherit from `memory::Allocator` and implement `allocate` / `deallocate`:
+
+```cpp
+struct My_Allocator : memory::Allocator
+{
+    void *allocate(u64 size) override { return my_malloc(size); }
+    void  deallocate(void *data) override { my_free(data); }
+};
+```
diff --git a/docs/platform.md b/docs/platform.md
new file mode 100644
index 00000000..8201c6e9
--- /dev/null
+++ b/docs/platform.md
@@ -0,0 +1,67 @@
+# Platform
+
+**Header:** `core/platform/platform.h`
+
+Cross-platform file I/O, path utilities, and native dialogs. Implementations provided for Windows, Linux, and macOS.
+
+---
+
+## File I/O
+
+```cpp
+#include <core/platform/platform.h>
+
+// Read entire file into a String
+String contents = platform_file_read("data/config.json");
+DEFER(string_deinit(contents));
+
+// With explicit allocator
+String contents = platform_file_read("shader.glsl", memory::temp_allocator());
+```
+
+---
+
+## Path Utilities
+
+```cpp
+bool valid = platform_path_is_valid("assets/textures");
+bool is_file = platform_path_is_file("assets/albedo.png");
+bool is_dir  = platform_path_is_directory("assets/");
+```
+
+All functions accept both `String` and `const char *`.
+
+---
+
+## Dialogs
+
+```cpp
+// Open file picker — returns chosen path or empty string
+String path = platform_dialog_file_open("Open Scene", "*.scene");
+DEFER(string_deinit(path));
+
+if (!string_is_empty(path))
+    load_scene(path);
+```
+
+```cpp
+// Save file picker
+String path = platform_dialog_file_save("Save Scene", "*.scene");
+```
+
+---
+
+## Platform Macros
+
+The build system defines these so you can conditionally compile:
+
+| Macro | Platform |
+|---|---|
+| `PLATFORM_WINDOWS` | Windows |
+| `PLATFORM_LINUX` | Linux |
+| `PLATFORM_MACOS` | macOS |
+| `COMPILER_MSVC` | MSVC |
+| `COMPILER_CLANG` | Clang / Apple Clang |
+| `COMPILER_GCC` | GCC |
+| `DEBUG` | Debug / RelWithDebInfo builds |
+| `RELEASE` | Release / MinSizeRel builds |
diff --git a/docs/print-log.md b/docs/print-log.md
new file mode 100644
index 00000000..ba400cc3
--- /dev/null
+++ b/docs/print-log.md
@@ -0,0 +1,59 @@
+# Print & Log
+
+---
+
+## Print
+
+**Header:** `core/print.h`
+
+`print_to` writes a formatted string to a `FILE *` or a custom callback.
+
+```cpp
+#include <core/print.h>
+
+print_to(stdout, "Hello, {}!\n", name);
+print_to(stderr, "Error: {}\n", message);
+
+// With color
+print_to(stdout, PRINT_COLOR_FG_GREEN, "OK\n");
+```
+
+### Colors
+
+| Constant | Appearance |
+|---|---|
+| `PRINT_COLOR_DEFAULT` | Terminal default |
+| `PRINT_COLOR_FG_RED` | Red text |
+| `PRINT_COLOR_FG_GREEN` | Green text |
+| `PRINT_COLOR_FG_YELLOW` | Yellow text |
+| `PRINT_COLOR_FG_BLUE` | Blue text |
+| `PRINT_COLOR_FG_WHITE_DIMMED` | Dimmed white |
+| `PRINT_COLOR_BG_RED` | Red background |
+
+### Custom Callback
+
+```cpp
+print_to([](PRINT_COLOR color, const char *msg) {
+    my_gui_log(msg);
+}, "Value: {}\n", value);
+```
+
+---
+
+## Log
+
+**Header:** `core/log.h`
+
+Convenience wrappers over `print_to`. Each level has a prefix and color.
+
+```cpp
+#include <core/log.h>
+
+log_debug("texture loaded: {}", path);    // [DEBUG]: ...  (blue, DEBUG builds only)
+log_info("server started on port {}", port);  // [INFO]: ...   (dimmed)
+log_warning("missing config key: {}", key);   // [WARNING]: ... (yellow, stderr)
+log_error("failed to open file: {}", path);   // [ERROR]: ...   (red, stderr)
+log_fatal("out of memory");                   // [FATAL]: ...   (red bg, stderr) — calls abort()
+```
+
+`log_debug` compiles to nothing in Release builds.
diff --git a/docs/reflect.md b/docs/reflect.md
new file mode 100644
index 00000000..2f07ebbf
--- /dev/null
+++ b/docs/reflect.md
@@ -0,0 +1,87 @@
+# Reflect
+
+**Header:** `core/reflect.h`
+
+Compile-time type reflection without macros or external codegen. Supports primitives, pointers, arrays, structs, and enums.
+
+---
+
+## Type Names
+
+```cpp
+#include <core/reflect.h>
+
+const char *name = name_of<int>();        // "i32"
+const char *name = name_of<float>();      // "f32"
+const char *name = name_of<MyStruct>();   // "MyStruct"
+const char *name = name_of<MyStruct*>();  // "MyStruct*"
+```
+
+---
+
+## Type Kinds
+
+```cpp
+Type_Info info = type_of<MyStruct>();
+
+switch (info.kind)
+{
+    case TYPE_KIND_STRUCT: ...
+    case TYPE_KIND_ENUM:   ...
+    case TYPE_KIND_I32:    ...
+    // etc.
+}
+```
+
+Full list of `TYPE_KIND` values covers all primitives, pointers, C arrays, structs, and enums.
+
+---
+
+## Struct Fields
+
+```cpp
+struct Vertex {
+    float x, y, z;
+};
+
+Type_Info info = type_of<Vertex>();
+for (const Type_Field &field : info.fields)
+{
+    print_to(stdout, "field: {} offset: {}\n", field.name, field.offset);
+}
+```
+
+Fields are discovered via structured bindings — the struct must be **aggregate-initializable**.
+
+---
+
+## Enums
+
+Use the `REFLECT_ENUM` macro to register enum values:
+
+```cpp
+enum Direction { NORTH, SOUTH, EAST, WEST };
+REFLECT_ENUM(Direction, NORTH, SOUTH, EAST, WEST);
+```
+
+Then iterate:
+
+```cpp
+Type_Info info = type_of<Direction>();
+for (const Type_Enum_Value &v : info.enum_values)
+    print_to(stdout, "{} = {}\n", v.name, v.value);
+```
+
+Supports negative values, non-contiguous values, and duplicate values.
+
+---
+
+## Limits
+
+| Constant | Default |
+|---|---|
+| `REFLECT_MAX_NAME_LENGTH` | 128 |
+| `REFLECT_MIN_ENUM_VALUE` | -32 |
+| `REFLECT_MAX_ENUM_VALUE` | 64 |
+
+These can be adjusted by modifying the header if your enums go outside this range.
diff --git a/docs/result.md b/docs/result.md
new file mode 100644
index 00000000..1ed2dac1
--- /dev/null
+++ b/docs/result.md
@@ -0,0 +1,65 @@
+# Result & Error
+
+**Header:** `core/result.h`
+
+`Error` is a simple value type carrying an optional formatted message. Functions that can fail return `Error` instead of throwing exceptions.
+
+---
+
+## Error
+
+```cpp
+#include <core/result.h>
+
+// Return success
+return Error{};
+
+// Return failure
+return Error{"failed to open file '{}'", path};
+```
+
+Check at call site:
+
+```cpp
+if (Error err = load_file("data.bin"))
+{
+    log_error("{}", err.message.data);
+    return err;
+}
+```
+
+`Error` converts to `bool`: `true` means failure (message is non-empty).
+
+---
+
+## Result\<T\>
+
+For functions that return a value **or** an error:
+
+```cpp
+Result<Texture> load_texture(const char *path);
+
+auto [texture, err] = load_texture("albedo.png");
+if (err)
+{
+    log_error("texture load failed: {}", err.message.data);
+    return err;
+}
+// use texture
+```
+
+---
+
+## Pattern: propagate with early return
+
+```cpp
+Error init_renderer()
+{
+    if (Error err = create_device())   return err;
+    if (Error err = create_swapchain()) return err;
+    if (Error err = load_shaders())    return err;
+    return Error{};
+}
+```
+
+This is the idiomatic way to chain fallible operations — no exception overhead, no hidden control flow.
diff --git a/docs/serialization.md b/docs/serialization.md
new file mode 100644
index 00000000..fb480fcb
--- /dev/null
+++ b/docs/serialization.md
@@ -0,0 +1,109 @@
+# Serialization
+
+**Header:** `core/serialization/serializer.h`
+
+Two concrete serializers are provided: binary and JSON. Both share the same `serialize()` interface so you can swap them freely.
+
+---
+
+## Interface
+
+```cpp
+template <typename S>
+Error serialize(S &serializer, const char *name, T &value);
+```
+
+- `S` is `Binary_Serializer` or `Json_Serializer`.
+- Returns `Error{}` on success, a descriptive `Error` on failure.
+
+---
+
+## Binary Serializer
+
+**Header:** `core/serialization/binary_serializer.h`
+
+```cpp
+#include <core/serialization/binary_serializer.h>
+
+// Write
+Binary_Serializer writer = binary_serializer_init_writer("save.bin");
+DEFER(binary_serializer_deinit(writer));
+
+serialize(writer, "health", player.health);
+serialize(writer, "position", player.position);
+```
+
+```cpp
+// Read
+Binary_Serializer reader = binary_serializer_init_reader("save.bin");
+DEFER(binary_serializer_deinit(reader));
+
+serialize(reader, "health", player.health);
+serialize(reader, "position", player.position);
+```
+
+---
+
+## JSON Serializer
+
+**Header:** `core/serialization/json_serializer.h`
+
+```cpp
+#include <core/serialization/json_serializer.h>
+
+// Write
+Json_Serializer writer = json_serializer_init_writer("config.json");
+DEFER(json_serializer_deinit(writer));
+
+serialize(writer, "width", config.width);
+serialize(writer, "height", config.height);
+serialize(writer, "fullscreen", config.fullscreen);
+```
+
+```cpp
+// Read
+Json_Serializer reader = json_serializer_init_reader("config.json");
+DEFER(json_serializer_deinit(reader));
+
+serialize(reader, "width", config.width);
+serialize(reader, "height", config.height);
+serialize(reader, "fullscreen", config.fullscreen);
+```
+
+---
+
+## Serializing Structs
+
+Use `serialize` with an `initializer_list` of `Serialize_Pair`:
+
+```cpp
+struct Config { int width; int height; bool fullscreen; };
+
+template <typename S>
+Error serialize(S &s, const char *name, Config &cfg)
+{
+    return serialize(s, {
+        {"width",      cfg.width},
+        {"height",     cfg.height},
+        {"fullscreen", cfg.fullscreen},
+    });
+}
+```
+
+---
+
+## Custom Types
+
+Specialize `serialize` for your type:
+
+```cpp
+template <typename S>
+Error serialize(S &s, const char *name, Vec3 &v)
+{
+    return serialize(s, {
+        {"x", v.x},
+        {"y", v.y},
+        {"z", v.z},
+    });
+}
+```
diff --git a/docs/validate.md b/docs/validate.md
new file mode 100644
index 00000000..6144114e
--- /dev/null
+++ b/docs/validate.md
@@ -0,0 +1,32 @@
+# Validate
+
+**Header:** `core/validate.h`
+
+`validate` is a runtime assertion that includes the source file, line, and function name automatically.
+
+---
+
+## Usage
+
+```cpp
+#include <core/validate.h>
+
+validate(index < count);
+validate(ptr != nullptr, "pointer must not be null");
+validate(size > 0, "size must be positive");
+```
+
+On failure it prints the condition, the message, and the source location, then aborts. In Debug builds this fires immediately. In Release builds the behaviour is configurable (currently also aborts).
+
+---
+
+## vs. `assert`
+
+| | `assert` | `validate` |
+|---|---|---|
+| Source location | Condition string + line | File, line, function |
+| Custom message | No | Yes |
+| Release behavior | Strips out | Configurable |
+| Used by containers | No | Yes (bounds checks) |
+
+All container bounds checks (`array[i]`, `span[i]`, etc.) use `validate` internally.
diff --git a/test/src/test.cpp b/test/src/test.cpp
index c134061a..80017827 100644
--- a/test/src/test.cpp
+++ b/test/src/test.cpp
@@ -18,52 +18,52 @@
 // 	{
 // 		case TYPE_KIND_I8:
 // 		{
-// 			printf("%" PRIi8, *(i8 *)v.data);
+// 			printf("%" PRIi8, *(I8 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_I16:
 // 		{
-// 			printf("%" PRIi16, *(i16 *)v.data);
+// 			printf("%" PRIi16, *(I16 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_I32:
 // 		{
-// 			printf("%" PRIi32, *(i32 *)v.data);
+// 			printf("%" PRIi32, *(I32 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_I64:
 // 		{
-// 			printf("%" PRIi64, *(i64 *)v.data);
+// 			printf("%" PRIi64, *(I64 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U8:
 // 		{
-// 			printf("%" PRIu8, *(u8 *)v.data);
+// 			printf("%" PRIu8, *(U8 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U16:
 // 		{
-// 			printf("%" PRIu16, *(u16 *)v.data);
+// 			printf("%" PRIu16, *(U16 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U32:
 // 		{
-// 			printf("%" PRIu32, *(u32 *)v.data);
+// 			printf("%" PRIu32, *(U32 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U64:
 // 		{
-// 			printf("%" PRIu64, *(u64 *)v.data);
+// 			printf("%" PRIu64, *(U64 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_F32:
 // 		{
-// 			printf("%g", *(f32 *)v.data);
+// 			printf("%g", *(F32 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_F64:
 // 		{
-// 			printf("%g", *(f64 *)v.data);
+// 			printf("%g", *(F64 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_BOOL:
@@ -79,7 +79,7 @@
 // 		case TYPE_KIND_STRUCT:
 // 		{
 // 			printf("%s { ", v.type->name);
-// 			for (u64 i = 0; i < v.type->as_struct.field_count; ++i)
+// 			for (U64 i = 0; i < v.type->as_struct.field_count; ++i)
 // 			{
 // 				if (i != 0)
 // 					printf(", ");
@@ -93,7 +93,7 @@
 // 		case TYPE_KIND_ARRAY:
 // 		{
 // 			printf("[ ");
-// 			for (u64 i = 0; i < v.type->as_array.element_count; ++i)
+// 			for (U64 i = 0; i < v.type->as_array.element_count; ++i)
 // 			{
 // 				if (i != 0)
 // 					printf(", ");
@@ -106,7 +106,7 @@
 // 		case TYPE_KIND_POINTER:
 // 		{
 // 			const auto *pointee = v.type->as_pointer.pointee;
-// 			uptr *pointer = *(uptr **)(v.data);
+// 			U64 *pointer = *(U64 **)(v.data);
 // 			if (v.type == type_of<const char *>() || v.type == type_of<char *>())
 // 			{
 // 				printf("\"%s\"", (const char *)pointer);
@@ -120,8 +120,8 @@
 // 		}
 // 		case TYPE_KIND_ENUM:
 // 		{
-// 			for (u64 i = 0; i < v.type->as_enum.value_count; ++i)
-// 				if (const auto & value = v.type->as_enum.values[i]; value.index == *(i32 *)(v.data))
+// 			for (U64 i = 0; i < v.type->as_enum.value_count; ++i)
+// 				if (const auto & value = v.type->as_enum.values[i]; value.index == *(I32 *)(v.data))
 // 					printf("%s(%" PRId32 ")", value.name, value.index);
 // 			break;
 // 		}
@@ -133,10 +133,10 @@
 // }
 
 // inline static void
-// to_json(Value v, i32 indent = 0)
+// to_json(Value v, I32 indent = 0)
 // {
-// 	constexpr auto print_tab = [](u64 count) {
-// 		for (u64 i = 0; i < count * 4; ++i)
+// 	constexpr auto print_tab = [](U64 count) {
+// 		for (U64 i = 0; i < count * 4; ++i)
 // 			printf(" ");
 // 	};
 
@@ -144,52 +144,52 @@
 // 	{
 // 		case TYPE_KIND_I8:
 // 		{
-// 			printf("%" PRIi8, *(i8 *)v.data);
+// 			printf("%" PRIi8, *(I8 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_I16:
 // 		{
-// 			printf("%" PRIi16, *(i16 *)v.data);
+// 			printf("%" PRIi16, *(I16 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_I32:
 // 		{
-// 			printf("%" PRIi32, *(i32 *)v.data);
+// 			printf("%" PRIi32, *(I32 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_I64:
 // 		{
-// 			printf("%" PRIi64, *(i64 *)v.data);
+// 			printf("%" PRIi64, *(I64 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U8:
 // 		{
-// 			printf("%" PRIu8, *(u8 *)v.data);
+// 			printf("%" PRIu8, *(U8 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U16:
 // 		{
-// 			printf("%" PRIu16, *(u16 *)v.data);
+// 			printf("%" PRIu16, *(U16 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U32:
 // 		{
-// 			printf("%" PRIu32, *(u32 *)v.data);
+// 			printf("%" PRIu32, *(U32 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_U64:
 // 		{
-// 			printf("%" PRIu64, *(u64 *)v.data);
+// 			printf("%" PRIu64, *(U64 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_F32:
 // 		{
-// 			printf("%g", *(f32 *)v.data);
+// 			printf("%g", *(F32 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_F64:
 // 		{
-// 			printf("%g", *(f64 *)v.data);
+// 			printf("%g", *(F64 *)v.data);
 // 			break;
 // 		}
 // 		case TYPE_KIND_BOOL:
@@ -205,7 +205,7 @@
 // 		case TYPE_KIND_STRUCT:
 // 		{
 // 			printf("{\n");
-// 			for (u64 i = 0; i < v.type->as_struct.field_count; ++i)
+// 			for (U64 i = 0; i < v.type->as_struct.field_count; ++i)
 // 			{
 // 				const auto *field = &v.type->as_struct.fields[i];
 // 				if (string_literal(field->tag) != "NoSerialize")
@@ -225,7 +225,7 @@
 // 		case TYPE_KIND_ARRAY:
 // 		{
 // 			printf("[");
-// 			for (u64 i = 0; i < v.type->as_array.element_count; ++i)
+// 			for (U64 i = 0; i < v.type->as_array.element_count; ++i)
 // 			{
 // 				if (i != 0)
 // 					printf(", ");
@@ -238,7 +238,7 @@
 // 		case TYPE_KIND_POINTER:
 // 		{
 // 			const auto *pointee = v.type->as_pointer.pointee;
-// 			uptr *pointer = *(uptr **)(v.data);
+// 			U64 *pointer = *(U64 **)(v.data);
 // 			if (v.type == type_of<const char *>() || v.type == type_of<char *>())
 // 			{
 // 				printf("\"%s\"", (const char *)pointer);
@@ -255,8 +255,8 @@
 // 		}
 // 		case TYPE_KIND_ENUM:
 // 		{
-// 			for (u64 i = 0; i < v.type->as_enum.value_count; ++i)
-// 				if (const auto & value = v.type->as_enum.values[i]; value.index == *(i32 *)(v.data))
+// 			for (U64 i = 0; i < v.type->as_enum.value_count; ++i)
+// 				if (const auto & value = v.type->as_enum.values[i]; value.index == *(I32 *)(v.data))
 // 					printf("\"%s\"", value.name);
 // 			break;
 // 		}
@@ -307,8 +307,8 @@
 // 	char a;
 // 	bool b;
 // 	const char *c[2];
-// 	i32 *d;
-// 	Bar<f32> e;
+// 	I32 *d;
+// 	Bar<F32> e;
 // 	Foo *f;
 // };
 
@@ -327,10 +327,10 @@
 // struct P
 // {
 // 	P() {}
-// 	P(f32 _x, f32 _y, f32 _z, f32 _w) : x(_x), y(_y), z(_z), w(_w) {}
+// 	P(F32 _x, F32 _y, F32 _z, F32 _w) : x(_x), y(_y), z(_z), w(_w) {}
 
 // private:
-// 	f32 x, y, z, w;
+// 	F32 x, y, z, w;
 
 // 	TYPE_OF_MEMBER(P)
 // };
@@ -339,7 +339,7 @@
 
 struct vec3
 {
-	f32 x, y, z;
+	F32 x, y, z;
 };
 
 inline static String
@@ -348,18 +348,18 @@ format(Formatter &self, const vec3 &data)
 	return format(self, "{{{}, {}, {}}}", data.x, data.y, data.z);;
 }
 
-i32
-main(i32, char **)
+I32
+main(I32, char **)
 {
-	// i32 d = 7;
-	// i32 dd = 13;
+	// I32 d = 7;
+	// I32 dd = 13;
 	// Foo f1 = {'A', true, {"Hello", "World"}, &d, {1.5f}, nullptr};
 	// Foo f2 = {'B', false, {"FOO", "BOO"}, &dd, {7.5f}, &f1};
 
 	// auto t2 = type_of(f2);
 	// unused(t2);
 	// to_json(value_of(f2));
-	// to_json(value_of(TWO<i32, f32>{1, 3.4f}));
+	// to_json(value_of(TWO<I32, F32>{1, 3.4f}));
 
 	// P p(1.5f, 2.5f, 3.5f, 4.5f);
 	// to_json(value_of(p));
@@ -372,12 +372,12 @@ main(i32, char **)
 	auto b5 = format("{}", 1.5f);
 	auto b6 = format("{}", 2.5);
 
-	i32 array[] = {1, 2, 3};
+	I32 array[] = {1, 2, 3};
 	auto b7 = format("{}", array);
 
 	auto b8 = format("{}", "Hello, World!");
 
-	i32 i = 5;
+	I32 i = 5;
 	auto b9 = format("{}", &i);
 
 	auto b10 = format("{}", 'A');
@@ -389,13 +389,13 @@ main(i32, char **)
 	vec3 v = {1, 2, 3};
 	auto b13 = format("{}", v);
 
-	Array<i32> a = array_init_from({1, 2, 3}, memory::temp_allocator());
+	Array<I32> a = array_init_from({1, 2, 3}, memory::temp_allocator());
 	auto b14 = format("{}", a);
 
 	String b = string_from("BB", memory::temp_allocator());
 	auto b15 = format("{}", b);
 
-	Hash_Table<i32, i32> c = hash_table_init_from<i32, i32>({{1, 2}, {3, 4}}, memory::temp_allocator());
+	Hash_Table<I32, I32> c = hash_table_init_from<I32, I32>({{1, 2}, {3, 4}}, memory::temp_allocator());
 	auto b16 = format("{}", c);
 
 	unused(b1, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16);
diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt
index ef12b26d..c693153a 100644
--- a/unittest/CMakeLists.txt
+++ b/unittest/CMakeLists.txt
@@ -6,6 +6,7 @@ set(SOURCE_FILES
     src/unittest_core.cpp
     src/unittest_formatter.cpp
     src/unittest_containers.cpp
+    src/unittest_math.cpp
     src/unittest_platform.cpp
     src/unittest_reflect.cpp
     src/unittest_serializer.cpp
@@ -15,10 +16,14 @@ set(LIBS
     core
 )
 
-add_executable(unittest ${HEADER_FILES} ${SOURCE_FILES})
-target_link_libraries(unittest PRIVATE ${LIBS})
-set_target_properties(unittest
+add_executable(core_unittest ${HEADER_FILES} ${SOURCE_FILES})
+target_link_libraries(core_unittest PRIVATE ${LIBS})
+set_target_properties(core_unittest
     PROPERTIES
         UNITY_BUILD_BATCH_SIZE 8
         UNITY_BUILD ${CORE_BUILD_UNITY}
-)
\ No newline at end of file
+)
+
+# Math micro-benchmark — not a test, run manually to compare SIMD vs scalar paths.
+add_executable(bench_math src/bench_math.cpp)
+target_link_libraries(bench_math PRIVATE ${LIBS})
\ No newline at end of file
diff --git a/unittest/src/bench_math.cpp b/unittest/src/bench_math.cpp
new file mode 100644
index 00000000..02551138
--- /dev/null
+++ b/unittest/src/bench_math.cpp
@@ -0,0 +1,535 @@
+// bench_math.cpp — realistic-workload benchmarks for Core's math SIMD vs scalar paths.
+//
+// Goal: a *defensible* decision on whether to keep per-op SIMD code. We measure
+// workloads that mirror real engine patterns (vertex transform, bone palette,
+// frustum culling, quaternion animation) rather than isolated ops, and we force
+// memory traffic so the compiler can't dead-code-eliminate our loops.
+//
+// Both SIMD and scalar implementations are compiled into the same binary so
+// comparison is apples-to-apples regardless of the CMake config.
+//
+// Run:
+//   cmake -DCMAKE_BUILD_TYPE=Release && ninja bench_math && ./bin/Release/bench_math
+//
+// Output: per-workload ns/element and a verdict ("SIMD +23%" or "SCALAR +8%")
+// that tells you whether the NEON path is worth keeping for that op.
+
+#include <core/defines.h>
+#include <core/memory/memory.h>
+#include <core/math/f32x4.h>
+#include <core/math/quaternion.h>
+#include <core/math/random.h>
+#include <core/math/f32.h>
+
+#include <chrono>
+#include <cstdio>
+
+#if defined(SIMD_NEON)
+#include <arm_neon.h>
+#endif
+#if defined(SIMD_AVX)
+#include <immintrin.h>
+#endif
+
+using Clock = std::chrono::steady_clock;
+
+// ---- escape barrier --------------------------------------------------------
+// Force the compiler to treat the pointed-to memory as clobbered, preventing
+// dead-code elimination and loop-invariant motion of the inner ops.
+#if defined(_MSC_VER)
+#include <intrin.h>
+// noinline sink: passing the pointer through a non-inlinable call forces the
+// compiler to keep all stores to that memory alive before the call site.
+static __declspec(noinline) void
+_do_not_optimize_sink(const void *, U64) {}
+
+static inline void
+memory_barrier()
+{
+	_ReadWriteBarrier();
+}
+
+template <typename T>
+static inline void
+do_not_optimize(const T *ptr, U64 count)
+{
+	_do_not_optimize_sink((const void *)ptr, count);
+	_ReadWriteBarrier();
+}
+#else
+static inline void
+memory_barrier()
+{
+	asm volatile("" : : : "memory");
+}
+
+template <typename T>
+static inline void
+do_not_optimize(const T *ptr, U64 count)
+{
+	asm volatile("" : : "r"(ptr), "r"(count) : "memory");
+}
+#endif
+
+// ---- timing helpers --------------------------------------------------------
+
+struct Timing
+{
+	F64 ns_median;
+	F64 ns_min;
+	U64 iterations;
+};
+
+template <typename Fn>
+static Timing
+time_runs(Fn &&fn, U64 iterations, int samples = 9)
+{
+	// Warm up once to prime caches / JIT-ish frequency scaling.
+	fn();
+	memory_barrier();
+
+	F64 times[32];
+	if (samples > 32) samples = 32;
+	for (int s = 0; s < samples; ++s)
+	{
+		auto t0 = Clock::now();
+		fn();
+		memory_barrier();
+		auto t1 = Clock::now();
+		times[s] = (F64)std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0).count();
+	}
+
+	// Sort, take median and min.
+	for (int i = 1; i < samples; ++i)
+	{
+		F64 x = times[i];
+		int j = i - 1;
+		while (j >= 0 && times[j] > x) { times[j + 1] = times[j]; --j; }
+		times[j + 1] = x;
+	}
+
+	return Timing{times[samples / 2], times[0], iterations};
+}
+
+static void
+print_verdict(const char *name, U64 count, Timing simd, Timing scalar)
+{
+	F64 simd_ns_per_el   = simd.ns_median   / (F64)count;
+	F64 scalar_ns_per_el = scalar.ns_median / (F64)count;
+
+	// Speedup of scalar over SIMD. >1 means SIMD is faster.
+	F64 speedup = scalar_ns_per_el / simd_ns_per_el;
+	const char *verdict;
+	char tag[32];
+	if (speedup >= 1.10)       { verdict = "SIMD";   snprintf(tag, sizeof(tag), "+%.0f%%",  (speedup - 1.0) * 100.0); }
+	else if (speedup <= 0.91)  { verdict = "SCALAR"; snprintf(tag, sizeof(tag), "+%.0f%%", (1.0 / speedup - 1.0) * 100.0); }
+	else                       { verdict = "TIE";    snprintf(tag, sizeof(tag), "%.2fx",    speedup); }
+
+	::printf("%-36s  %8.3f ns/el (simd)  %8.3f ns/el (scalar)   %-6s %s\n",
+		name, simd_ns_per_el, scalar_ns_per_el, verdict, tag);
+}
+
+// ============================================================================
+// Scalar reference implementations. These match what Core would compute if
+// we stripped the SIMD backend. They're here so we can A/B them against the
+// SIMD versions inside the same binary.
+// ============================================================================
+
+struct Scalar_F32x4 { F32 x, y, z, w; };
+
+static inline F32
+scalar_f32x4_dot(const Scalar_F32x4 &a, const Scalar_F32x4 &b)
+{
+	return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+}
+
+// Transform a point (vec4 * matrix, row-vector convention). Row-major 4x4.
+static inline Scalar_F32x4
+scalar_vec_mat(const Scalar_F32x4 &v, const F32 M[16])
+{
+	return Scalar_F32x4{
+		v.x * M[0]  + v.y * M[4]  + v.z * M[8]  + v.w * M[12],
+		v.x * M[1]  + v.y * M[5]  + v.z * M[9]  + v.w * M[13],
+		v.x * M[2]  + v.y * M[6]  + v.z * M[10] + v.w * M[14],
+		v.x * M[3]  + v.y * M[7]  + v.z * M[11] + v.w * M[15]
+	};
+}
+
+static inline void
+scalar_mat_mat(const F32 A[16], const F32 B[16], F32 R[16])
+{
+	for (int i = 0; i < 4; ++i)
+	{
+		const F32 a0 = A[i*4+0], a1 = A[i*4+1], a2 = A[i*4+2], a3 = A[i*4+3];
+		R[i*4+0] = a0 * B[0] + a1 * B[4] + a2 * B[8]  + a3 * B[12];
+		R[i*4+1] = a0 * B[1] + a1 * B[5] + a2 * B[9]  + a3 * B[13];
+		R[i*4+2] = a0 * B[2] + a1 * B[6] + a2 * B[10] + a3 * B[14];
+		R[i*4+3] = a0 * B[3] + a1 * B[7] + a2 * B[11] + a3 * B[15];
+	}
+}
+
+// ============================================================================
+// SIMD implementations via arch intrinsics (mirrors Core's f32x4.h / f32x4x4.h).
+// ============================================================================
+
+#if defined(SIMD_NEON)
+
+static inline F32
+simd_f32x4_dot(float32x4_t a, float32x4_t b)
+{
+	return vaddvq_f32(vmulq_f32(a, b));
+}
+
+static inline float32x4_t
+simd_vec_mat(float32x4_t v, const float32x4_t M[4])
+{
+	float32x4_t r = vmulq_laneq_f32(M[0], v, 0);
+	r = vfmaq_laneq_f32(r, M[1], v, 1);
+	r = vfmaq_laneq_f32(r, M[2], v, 2);
+	r = vfmaq_laneq_f32(r, M[3], v, 3);
+	return r;
+}
+
+static inline void
+simd_mat_mat(const float32x4_t A[4], const float32x4_t B[4], float32x4_t R[4])
+{
+	for (int i = 0; i < 4; ++i)
+	{
+		float32x4_t ai = A[i];
+		float32x4_t r = vmulq_laneq_f32(B[0], ai, 0);
+		r = vfmaq_laneq_f32(r, B[1], ai, 1);
+		r = vfmaq_laneq_f32(r, B[2], ai, 2);
+		r = vfmaq_laneq_f32(r, B[3], ai, 3);
+		R[i] = r;
+	}
+}
+
+#elif defined(SIMD_AVX)
+
+static inline F32
+simd_f32x4_dot(__m128 a, __m128 b)
+{
+	return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
+}
+
+static inline __m128
+simd_vec_mat(__m128 v, const __m128 M[4])
+{
+	__m128 x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0,0,0,0));
+	__m128 y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1,1,1,1));
+	__m128 z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2,2,2,2));
+	__m128 w = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3,3,3,3));
+	return _mm_add_ps(_mm_add_ps(_mm_mul_ps(x, M[0]), _mm_mul_ps(y, M[1])),
+	                  _mm_add_ps(_mm_mul_ps(z, M[2]), _mm_mul_ps(w, M[3])));
+}
+
+static inline void
+simd_mat_mat(const __m128 A[4], const __m128 B[4], __m128 R[4])
+{
+	for (int i = 0; i < 4; ++i)
+	{
+		__m128 ai = A[i];
+		__m128 x = _mm_shuffle_ps(ai, ai, _MM_SHUFFLE(0,0,0,0));
+		__m128 y = _mm_shuffle_ps(ai, ai, _MM_SHUFFLE(1,1,1,1));
+		__m128 z = _mm_shuffle_ps(ai, ai, _MM_SHUFFLE(2,2,2,2));
+		__m128 w = _mm_shuffle_ps(ai, ai, _MM_SHUFFLE(3,3,3,3));
+		R[i] = _mm_add_ps(_mm_add_ps(_mm_mul_ps(x, B[0]), _mm_mul_ps(y, B[1])),
+		                  _mm_add_ps(_mm_mul_ps(z, B[2]), _mm_mul_ps(w, B[3])));
+	}
+}
+
+#endif
+
+// ============================================================================
+// Workloads. Each takes a pre-allocated input and writes to a pre-allocated
+// output. The pointers are `volatile`-adjacent (via do_not_optimize) so the
+// compiler can't constant-fold across calls.
+// ============================================================================
+
+// Workload 1: Transform N vectors by a matrix (vertex-shader emulation).
+static void
+workload_transform_scalar(const Scalar_F32x4 *in, const F32 *M, Scalar_F32x4 *out, U64 count)
+{
+	for (U64 i = 0; i < count; ++i)
+		out[i] = scalar_vec_mat(in[i], M);
+	do_not_optimize(out, count);
+}
+
+#if defined(SIMD_NEON)
+static void
+workload_transform_simd(const float32x4_t *in, const float32x4_t *M, float32x4_t *out, U64 count)
+{
+	for (U64 i = 0; i < count; ++i)
+		out[i] = simd_vec_mat(in[i], M);
+	do_not_optimize(out, count);
+}
+#elif defined(SIMD_AVX)
+static void
+workload_transform_simd(const __m128 *in, const __m128 *M, __m128 *out, U64 count)
+{
+	for (U64 i = 0; i < count; ++i)
+		out[i] = simd_vec_mat(in[i], M);
+	do_not_optimize(out, count);
+}
+#endif
+
+// Workload 2: Chain N matrix-matrix multiplies (bone palette / skeleton composition).
+static void
+workload_mat_chain_scalar(const F32 *mats_flat, F32 *out_flat, U64 count)
+{
+	F32 acc[16] = {
+		1.0f, 0.0f, 0.0f, 0.0f,
+		0.0f, 1.0f, 0.0f, 0.0f,
+		0.0f, 0.0f, 1.0f, 0.0f,
+		0.0f, 0.0f, 0.0f, 1.0f
+	};
+	F32 tmp[16];
+	for (U64 i = 0; i < count; ++i)
+	{
+		scalar_mat_mat(acc, mats_flat + i * 16, tmp);
+		for (int k = 0; k < 16; ++k) acc[k] = tmp[k];
+	}
+	for (int k = 0; k < 16; ++k) out_flat[k] = acc[k];
+	do_not_optimize(out_flat, 16);
+}
+
+#if defined(SIMD_NEON)
+static void
+workload_mat_chain_simd(const float32x4_t *mats, float32x4_t *out, U64 count)
+{
+	float32x4_t acc[4] = {
+		{1.0f, 0.0f, 0.0f, 0.0f},
+		{0.0f, 1.0f, 0.0f, 0.0f},
+		{0.0f, 0.0f, 1.0f, 0.0f},
+		{0.0f, 0.0f, 0.0f, 1.0f}
+	};
+	float32x4_t tmp[4];
+	for (U64 i = 0; i < count; ++i)
+	{
+		simd_mat_mat(acc, mats + i * 4, tmp);
+		acc[0] = tmp[0]; acc[1] = tmp[1]; acc[2] = tmp[2]; acc[3] = tmp[3];
+	}
+	out[0] = acc[0]; out[1] = acc[1]; out[2] = acc[2]; out[3] = acc[3];
+	do_not_optimize(out, 4);
+}
+#elif defined(SIMD_AVX)
+static void
+workload_mat_chain_simd(const __m128 *mats, __m128 *out, U64 count)
+{
+	__m128 acc[4] = {
+		_mm_setr_ps(1, 0, 0, 0),
+		_mm_setr_ps(0, 1, 0, 0),
+		_mm_setr_ps(0, 0, 1, 0),
+		_mm_setr_ps(0, 0, 0, 1)
+	};
+	__m128 tmp[4];
+	for (U64 i = 0; i < count; ++i)
+	{
+		simd_mat_mat(acc, mats + i * 4, tmp);
+		acc[0] = tmp[0]; acc[1] = tmp[1]; acc[2] = tmp[2]; acc[3] = tmp[3];
+	}
+	out[0] = acc[0]; out[1] = acc[1]; out[2] = acc[2]; out[3] = acc[3];
+	do_not_optimize(out, 4);
+}
+#endif
+
+// Workload 3: Dot products of N pairs (frustum culling-ish — each plane × each point).
+static void
+workload_dots_scalar(const Scalar_F32x4 *a, const Scalar_F32x4 *b, F32 *out, U64 count)
+{
+	for (U64 i = 0; i < count; ++i)
+		out[i] = scalar_f32x4_dot(a[i], b[i]);
+	do_not_optimize(out, count);
+}
+
+#if defined(SIMD_NEON)
+static void
+workload_dots_simd(const float32x4_t *a, const float32x4_t *b, F32 *out, U64 count)
+{
+	for (U64 i = 0; i < count; ++i)
+		out[i] = simd_f32x4_dot(a[i], b[i]);
+	do_not_optimize(out, count);
+}
+#elif defined(SIMD_AVX)
+static void
+workload_dots_simd(const __m128 *a, const __m128 *b, F32 *out, U64 count)
+{
+	for (U64 i = 0; i < count; ++i)
+		out[i] = simd_f32x4_dot(a[i], b[i]);
+	do_not_optimize(out, count);
+}
+#endif
+
+// Workload 4: Quaternion slerp (animation sampling). Uses Core's `quaternion_slerp`
+// directly since it's already scalar math; we compare the whole Core op vs a
+// hand-unrolled SIMD version below.
+static void
+workload_slerp_scalar(const Quaternion *a, const Quaternion *b, const F32 *t, Quaternion *out, U64 count)
+{
+	for (U64 i = 0; i < count; ++i)
+		out[i] = quaternion_slerp(a[i], b[i], t[i]);
+	do_not_optimize(out, count);
+}
+
+// ============================================================================
+// main — run each workload at several sizes. Sizes chosen to hit L1, L2, and
+// LLC/main-memory so cache effects are visible.
+// ============================================================================
+
+int
+main()
+{
+#if defined(SIMD_NEON)
+	::printf("=== Big bench (SIMD: NEON ARM64, Release) ===\n");
+#elif defined(SIMD_AVX)
+	::printf("=== Big bench (SIMD: AVX x86_64, Release) ===\n");
+#else
+	::printf("=== Big bench (no SIMD \u2014 rebuild with SIMD on) ===\n");
+	return 0;
+#endif
+
+	::printf("%-36s  %20s  %20s   %-6s %s\n", "Workload (size)", "SIMD", "SCALAR", "WIN", "margin");
+	::printf("-------------------------------------------------------------------------------------------------\n");
+
+	const U64 sizes[] = {
+		1024,         // fits in L1 (16 KiB for F32x4)
+		64 * 1024,    // L2-ish
+		1024 * 1024,  // LLC / main memory
+	};
+
+	for (U64 N : sizes)
+	{
+		auto *in_a      = memory::allocate<Scalar_F32x4>(N);
+		auto *in_b      = memory::allocate<Scalar_F32x4>(N);
+		auto *out_v     = memory::allocate<Scalar_F32x4>(N);
+		auto *out_f     = memory::allocate<F32>(N);
+
+		for (U64 i = 0; i < N; ++i)
+		{
+			in_a[i] = Scalar_F32x4{(F32)i * 0.01f, (F32)i * 0.02f, 1.0f, 1.0f};
+			in_b[i] = Scalar_F32x4{0.5f, (F32)i * 0.005f, -0.25f, 1.0f};
+		}
+
+		F32 M[16] = {
+			1.1f, 0.2f, 0.3f, 0.0f,
+			0.4f, 1.5f, 0.6f, 0.0f,
+			0.7f, 0.8f, 1.9f, 0.0f,
+			0.1f, 0.2f, 0.3f, 1.0f
+		};
+
+		// Pick a reasonable iteration count per size. Bigger size -> fewer iters.
+		U64 iters = N <= 1024 ? 50000 : (N <= 65536 ? 2000 : 100);
+
+		// Transform
+		{
+			auto simd_t = time_runs([&](){
+				for (U64 i = 0; i < iters; ++i)
+#if defined(SIMD_NEON)
+					workload_transform_simd((const float32x4_t *)in_a, (const float32x4_t *)M,
+					                         (float32x4_t *)out_v, N);
+#elif defined(SIMD_AVX)
+					workload_transform_simd((const __m128 *)in_a, (const __m128 *)M,
+					                         (__m128 *)out_v, N);
+#endif
+			}, iters * N);
+			auto scalar_t = time_runs([&](){
+				for (U64 i = 0; i < iters; ++i)
+					workload_transform_scalar(in_a, M, out_v, N);
+			}, iters * N);
+
+			char label[64]; snprintf(label, sizeof(label), "transform vec*mat (N=%llu)", (unsigned long long)N);
+			print_verdict(label, iters * N, simd_t, scalar_t);
+		}
+
+		// Dot products (frustum-culling-shaped)
+		{
+			auto simd_t = time_runs([&](){
+				for (U64 i = 0; i < iters; ++i)
+#if defined(SIMD_NEON)
+					workload_dots_simd((const float32x4_t *)in_a, (const float32x4_t *)in_b, out_f, N);
+#elif defined(SIMD_AVX)
+					workload_dots_simd((const __m128 *)in_a, (const __m128 *)in_b, out_f, N);
+#endif
+			}, iters * N);
+			auto scalar_t = time_runs([&](){
+				for (U64 i = 0; i < iters; ++i)
+					workload_dots_scalar(in_a, in_b, out_f, N);
+			}, iters * N);
+
+			char label[64]; snprintf(label, sizeof(label), "f32x4_dot x N  (N=%llu)", (unsigned long long)N);
+			print_verdict(label, iters * N, simd_t, scalar_t);
+		}
+
+		memory::deallocate(in_a);
+		memory::deallocate(in_b);
+		memory::deallocate(out_v);
+		memory::deallocate(out_f);
+	}
+
+	// Matrix chain workload — fixed size (bone palette = 128 typical).
+	{
+		const U64 N = 128;
+		U64 iters = 200000;
+
+		F32 *mats_flat = memory::allocate<F32>(N * 16);
+		for (U64 i = 0; i < N * 16; ++i) mats_flat[i] = (F32)(i % 7) * 0.13f + 1.0f;
+
+		F32 out_flat[16];
+
+		auto simd_t = time_runs([&](){
+			for (U64 i = 0; i < iters; ++i)
+#if defined(SIMD_NEON)
+				workload_mat_chain_simd((const float32x4_t *)mats_flat, (float32x4_t *)out_flat, N);
+#elif defined(SIMD_AVX)
+				workload_mat_chain_simd((const __m128 *)mats_flat, (__m128 *)out_flat, N);
+#endif
+		}, iters * N);
+
+		auto scalar_t = time_runs([&](){
+			for (U64 i = 0; i < iters; ++i)
+				workload_mat_chain_scalar(mats_flat, out_flat, N);
+		}, iters * N);
+
+		char label[64]; snprintf(label, sizeof(label), "mat*mat chain  (N=%llu)", (unsigned long long)N);
+		print_verdict(label, iters * N, simd_t, scalar_t);
+
+		memory::deallocate(mats_flat);
+	}
+
+	// Quaternion slerp workload (animation sampling, 1024 bones × many frames).
+	{
+		const U64 N = 2048;
+		U64 iters = 500;
+
+		Quaternion *a = memory::allocate<Quaternion>(N);
+		Quaternion *b = memory::allocate<Quaternion>(N);
+		F32        *t = memory::allocate<F32>(N);
+		Quaternion *o = memory::allocate<Quaternion>(N);
+
+		Random rng = random_from_seed(12345);
+		for (U64 i = 0; i < N; ++i)
+		{
+			a[i] = quaternion_random(rng);
+			b[i] = quaternion_random(rng);
+			t[i] = f32_random_unit(rng);
+		}
+
+		auto scalar_t = time_runs([&](){
+			for (U64 i = 0; i < iters; ++i)
+				workload_slerp_scalar(a, b, t, o, N);
+		}, iters * N);
+
+		// No distinct SIMD version for slerp in this bench (Quaternion is scalar
+		// in Core). Report scalar-only so we have a reference number.
+		::printf("%-36s  %43s  %8.3f ns/el  (no separate SIMD path)\n",
+			"quaternion_slerp             (N=2048)", "-", scalar_t.ns_median / (F64)(iters * N));
+
+		memory::deallocate(a); memory::deallocate(b); memory::deallocate(t); memory::deallocate(o);
+	}
+
+	::printf("\nNotes:\n");
+	::printf("  * WIN column is per-op speedup of the winner; TIE within 10%%.\n");
+	::printf("  * L1/L2/LLC sized workloads isolate ALU bottlenecks vs memory-bandwidth bottlenecks.\n");
+	::printf("  * A consistent SCALAR win across all sizes = strip the SIMD path.\n");
+	return 0;
+}
diff --git a/unittest/src/unittest.cpp b/unittest/src/unittest.cpp
index 8f290f42..ecb5b3f0 100644
--- a/unittest/src/unittest.cpp
+++ b/unittest/src/unittest.cpp
@@ -1,7 +1,7 @@
 #include <core/tester.h>
 
-i32
-main(i32, char **)
+I32
+main(I32, char **)
 {
 	if (!tester_run(tester()))
 		return EXIT_FAILURE;
diff --git a/unittest/src/unittest_containers.cpp b/unittest/src/unittest_containers.cpp
index 48ebc0cc..1740e357 100644
--- a/unittest/src/unittest_containers.cpp
+++ b/unittest/src/unittest_containers.cpp
@@ -3,6 +3,8 @@
 #include <core/containers/array.h>
 #include <core/containers/hash_set.h>
 #include <core/containers/hash_table.h>
+#include <core/containers/ring_buffer.h>
+#include <core/containers/span.h>
 #include <core/containers/stack_array.h>
 #include <core/containers/string.h>
 #include <core/containers/string_interner.h>
@@ -12,7 +14,7 @@ TESTER_TEST("[CONTAINERS]: Array")
 	// ("init")
 	{
 		{
-			auto array = array_init<i32>();
+			auto array = array_init<I32>();
 			DEFER(array_deinit(array));
 
 			TESTER_CHECK(array.data == nullptr);
@@ -22,7 +24,7 @@ TESTER_TEST("[CONTAINERS]: Array")
 		}
 
 		{
-			auto array = array_init_with_capacity<i32>(100);
+			auto array = array_init_with_capacity<I32>(100);
 			DEFER(array_deinit(array));
 
 			TESTER_CHECK(array.data != nullptr);
@@ -32,7 +34,7 @@ TESTER_TEST("[CONTAINERS]: Array")
 		}
 
 		{
-			auto array = array_init_with_count<i32>(100);
+			auto array = array_init_with_count<I32>(100);
 			DEFER(array_deinit(array));
 
 			TESTER_CHECK(array.data != nullptr);
@@ -67,23 +69,23 @@ TESTER_TEST("[CONTAINERS]: Array")
 
 		TESTER_CHECK(array2.count == array1.count);
 		TESTER_CHECK(array2.capacity == array1.capacity);
-		for (u64 i = 0; i < array2.count; ++i)
+		for (U64 i = 0; i < array2.count; ++i)
 			TESTER_CHECK(array2[i] == array1[i]);
 	}
 
 	// ("fill")
 	{
-		auto array = array_init_with_count<f32>(50);
+		auto array = array_init_with_count<F32>(50);
 		DEFER(array_deinit(array));
 
 		array_fill(array, 5.0f);
-		for (u64 i = 0; i < array.count; ++i)
+		for (U64 i = 0; i < array.count; ++i)
 			TESTER_CHECK(array[i] == 5.0f);
 	}
 
 	// ("reserve")
 	{
-		auto array = array_init_with_capacity<i32>(10);
+		auto array = array_init_with_capacity<I32>(10);
 		DEFER(array_deinit(array));
 
 		TESTER_CHECK(array.data != nullptr);
@@ -95,19 +97,19 @@ TESTER_TEST("[CONTAINERS]: Array")
 
 	// ("push/pop/last")
 	{
-		auto array = array_init<u64>();
+		auto array = array_init<U64>();
 		DEFER(array_deinit(array));
 
-		for (u64 i = 0; i < 100; ++i)
+		for (U64 i = 0; i < 100; ++i)
 			array_push(array, i);
 		TESTER_CHECK(array.count == 100);
 
-		for (u64 i = 0; i < array.count; ++i)
+		for (U64 i = 0; i < array.count; ++i)
 			TESTER_CHECK(array[i] == i);
 
-		TESTER_CHECK(array_last(array) == 99);
+		TESTER_CHECK(array_back(array) == 99);
 
-		for (u64 i = 0; i < 100; ++i)
+		for (U64 i = 0; i < 100; ++i)
 			TESTER_CHECK(array_pop(array) == 99 - i);
 
 		TESTER_CHECK(array.count == 0);
@@ -115,16 +117,16 @@ TESTER_TEST("[CONTAINERS]: Array")
 
 	// ("remove")
 	{
-		auto array = array_init<u64>();
+		auto array = array_init<U64>();
 		DEFER(array_deinit(array));
 
-		for (u64 i = 0; i < 100; ++i)
+		for (U64 i = 0; i < 100; ++i)
 			array_push(array, i);
 		TESTER_CHECK(array.count == 100);
 
 		array_remove(array, array.count - 1);
 		TESTER_CHECK(array.count == 99);
-		for (u64 i = 0; i < 99; ++i)
+		for (U64 i = 0; i < 99; ++i)
 			TESTER_CHECK(array[i] == i);
 
 		array_remove(array, 0);
@@ -132,12 +134,12 @@ TESTER_TEST("[CONTAINERS]: Array")
 		TESTER_CHECK(array.count == 98);
 
 		array_remove_ordered(array, 0);
-		for (u64 i = 0; i < 97; ++i)
+		for (U64 i = 0; i < 97; ++i)
 			TESTER_CHECK(array[i] == i + 1);
 		TESTER_CHECK(array.count == 97);
 		array_remove(array, 0);
 
-		array_remove_if(array, [](u64 element) {
+		array_remove_if(array, [](U64 element) {
 			return element % 2 == 0;
 		});
 		TESTER_CHECK(array.count == 48);
@@ -145,19 +147,19 @@ TESTER_TEST("[CONTAINERS]: Array")
 
 	// ("remove_ordered")
 	{
-		auto array = array_init<i32>();
+		auto array = array_init<I32>();
 		DEFER(array_deinit(array));
 
-		for (u64 i = 0; i < 100; ++i)
-			array_push(array, i32(i));
+		for (U64 i = 0; i < 100; ++i)
+			array_push(array, I32(i));
 		TESTER_CHECK(array.count == 100);
 
-		array_remove_ordered_if(array, [](i32 element) {
+		array_remove_ordered_if(array, [](I32 element) {
 			return element % 2 == 0;
 		});
 
-		i32 j = 1;
-		for (u64 i = 0; i < 50; ++i)
+		I32 j = 1;
+		for (U64 i = 0; i < 50; ++i)
 		{
 			TESTER_CHECK(array[i] == j);
 			j += 2;
@@ -166,7 +168,7 @@ TESTER_TEST("[CONTAINERS]: Array")
 
 		array_remove_ordered(array, 0);
 		j = 3;
-		for (u64 i = 0; i < 49; ++i)
+		for (U64 i = 0; i < 49; ++i)
 		{
 			TESTER_CHECK(array[i] == j);
 			j += 2;
@@ -185,19 +187,19 @@ TESTER_TEST("[CONTAINERS]: Array")
 		array_append(array1, array2);
 		TESTER_CHECK(array1.count == 10);
 
-		for (i32 i = 0; i < (i32)array1.count; ++i)
+		for (I32 i = 0; i < (I32)array1.count; ++i)
 			TESTER_CHECK(array1[i] == i);
 	}
 
 	// ("iterators")
 	{
-		auto array = array_init_from<i32>({0, 1, 2, 3, 4});
+		auto array = array_init_from<I32>({0, 1, 2, 3, 4});
 		DEFER(array_deinit(array));
 
 		TESTER_CHECK(begin(array) == array.data);
 		TESTER_CHECK(end(array) == array.data + array.count);
 
-		i32 i = 0;
+		I32 i = 0;
 		for (auto v : array)
 			TESTER_CHECK(v == i++);
 
@@ -215,17 +217,17 @@ TESTER_TEST("[CONTAINERS]: Array")
 
 	// ("clone")
 	{
-		auto array1 = array_init<Array<i32>>();
+		auto array1 = array_init<Array<I32>>();
 		DEFER(destroy(array1));
-		array_push(array1, array_init_from<i32>({1, 2, 3}));
+		array_push(array1, array_init_from<I32>({1, 2, 3}));
 
 		auto array2 = clone(array1);
 		DEFER(destroy(array2));
 
-		for (u64 i = 0; i < array2.count; ++i)
+		for (U64 i = 0; i < array2.count; ++i)
 		{
 			TESTER_CHECK(array2.count == array1.count);
-			for (u64 j = 0; j < array2[i].count; ++j)
+			for (U64 j = 0; j < array2[i].count; ++j)
 			{
 				auto v1 = array1[i][j];
 				auto v2 = array2[i][j];
@@ -236,11 +238,11 @@ TESTER_TEST("[CONTAINERS]: Array")
 
 	// ("destroy")
 	{
-		auto v = array_init<Array<i32>>();
+		auto v = array_init<Array<I32>>();
 		DEFER(destroy(v));
 
-		array_push(v, array_init_from<i32>({1, 2, 3}));
-		array_push(v, array_init_from<i32>({4, 5, 6}));
+		array_push(v, array_init_from<I32>({1, 2, 3}));
+		array_push(v, array_init_from<I32>({4, 5, 6}));
 	}
 }
 
@@ -249,20 +251,20 @@ TESTER_TEST("[CONTAINERS]: Stack_Array")
 	// ("init")
 	{
 		{
-			Stack_Array<u64, 4> array{};
+			Stack_Array<U64, 4> array{};
 			TESTER_CHECK(array.count == 0);
 		}
 
 		{
 			Stack_Array array{{1, 2, 3}};
-			for (u64 i = 0; i < array.count; ++i)
-				TESTER_CHECK(array[i] == i32(i + 1));
+			for (U64 i = 0; i < array.count; ++i)
+				TESTER_CHECK(array[i] == I32(i + 1));
 			TESTER_CHECK(array.count == 3);
 		}
 
 		{
-			Stack_Array<u64, 3> array{{1, 2, 3}};
-			for (u64 i = 0; i < array.count; ++i)
+			Stack_Array<U64, 3> array{{1, 2, 3}};
+			for (U64 i = 0; i < array.count; ++i)
 				TESTER_CHECK(array[i] == i + 1);
 			TESTER_CHECK(array.count == 3);
 		}
@@ -283,7 +285,7 @@ TESTER_TEST("[CONTAINERS]: String")
 		s = string_from(c_string);
 		TESTER_CHECK(s.count == 13);
 		TESTER_CHECK(s.capacity == 14);
-		for (u64 i = 0; i < s.count; ++i)
+		for (U64 i = 0; i < s.count; ++i)
 			TESTER_CHECK(s[i] == c_string[i]);
 		TESTER_CHECK(s.data[s.count] == '\0');
 		string_deinit(s);
@@ -292,7 +294,7 @@ TESTER_TEST("[CONTAINERS]: String")
 		TESTER_CHECK(s.allocator == nullptr);
 		TESTER_CHECK(s.count == 13);
 		TESTER_CHECK(s.capacity == 14);
-		for (u64 i = 0; i < s.count; ++i)
+		for (U64 i = 0; i < s.count; ++i)
 			TESTER_CHECK(s[i] == c_string[i]);
 		TESTER_CHECK(s.data[s.count] == '\0');
 		string_deinit(s);
@@ -315,7 +317,7 @@ TESTER_TEST("[CONTAINERS]: String")
 		s = string_from(literal, literal + 13);
 		TESTER_CHECK(s.count == 13);
 		TESTER_CHECK(s.capacity == 14);
-		for (u64 i = 0; i < 13; ++i)
+		for (U64 i = 0; i < 13; ++i)
 			TESTER_CHECK(s[i] == literal[i]);
 		string_deinit(s);
 	}
@@ -330,7 +332,7 @@ TESTER_TEST("[CONTAINERS]: String")
 
 		TESTER_CHECK(s2.count == s1.count);
 		TESTER_CHECK(s2.capacity == s1.capacity);
-		for (u64 i = 0; i < s2.count; ++i)
+		for (U64 i = 0; i < s2.count; ++i)
 			TESTER_CHECK(s2[i] == s1[i]);
 	}
 
@@ -349,7 +351,7 @@ TESTER_TEST("[CONTAINERS]: String")
 		TESTER_CHECK(s.capacity == 14);
 
 		auto expected = "Hello, World!";
-		for (u64 i = 0; i < s.count; ++i)
+		for (U64 i = 0; i < s.count; ++i)
 			TESTER_CHECK(s[i] == expected[i]);
 		TESTER_CHECK(s.data[s.count] == '\0');
 
@@ -400,7 +402,7 @@ TESTER_TEST("[CONTAINERS]: String")
 		auto expected = "Hello, World!";
 		TESTER_CHECK(s1.count == 13);
 		TESTER_CHECK(s1.capacity == 14);
-		for (u64 i = 0; i < s1.count; ++i)
+		for (U64 i = 0; i < s1.count; ++i)
 			TESTER_CHECK(s1[i] == expected[i]);
 		TESTER_CHECK(s1.data[s1.count] == '\0');
 
@@ -408,7 +410,7 @@ TESTER_TEST("[CONTAINERS]: String")
 		string_trim_left(s1, string_literal("H"));
 		TESTER_CHECK(s1.count == 12);
 		TESTER_CHECK(s1.capacity == 13);
-		for (u64 i = 1; i < s1.count; ++i)
+		for (U64 i = 1; i < s1.count; ++i)
 			TESTER_CHECK(s1[i - 1] == expected[i]);
 		TESTER_CHECK(s1.data[s1.count] == '\0');
 
@@ -416,7 +418,7 @@ TESTER_TEST("[CONTAINERS]: String")
 		string_trim_right(s1, string_literal("!"));
 		TESTER_CHECK(s1.count == 11);
 		TESTER_CHECK(s1.capacity == 12);
-		for (u64 i = 1; i < s1.count; ++i)
+		for (U64 i = 1; i < s1.count; ++i)
 			TESTER_CHECK(s1[i - 1] == expected[i]);
 		TESTER_CHECK(s1.data[s1.count] == '\0');
 
@@ -428,7 +430,7 @@ TESTER_TEST("[CONTAINERS]: String")
 
 		TESTER_CHECK(s2.count == 13);
 		TESTER_CHECK(s2.capacity == 14);
-		for (u64 i = 0; i < s2.count; ++i)
+		for (U64 i = 0; i < s2.count; ++i)
 			TESTER_CHECK(s2[i] == expected[i]);
 		TESTER_CHECK(s2.data[s2.count] == '\0');
 	}
@@ -501,7 +503,7 @@ TESTER_TEST("[CONTAINERS]: String")
 
 		auto expected = "Hello, World!";
 		TESTER_CHECK(s.count == 13);
-		for (u64 i = 0; i < s.count; ++i)
+		for (U64 i = 0; i < s.count; ++i)
 			TESTER_CHECK(s[i] == expected[i]);
 		TESTER_CHECK(s.data[s.count] == '\0');
 
@@ -586,7 +588,7 @@ TESTER_TEST("[CONTAINERS]: String")
 
 struct Foo
 {
-	i32 x;
+	I32 x;
 
 	inline bool
 	operator==(const Foo &other) const
@@ -601,7 +603,7 @@ struct Foo
 	}
 };
 
-inline static u64
+inline static U64
 hash(const Foo &value)
 {
 	return hash_fnv_x32(&value.x, sizeof(value.x));
@@ -633,7 +635,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 		}
 
 		{
-			auto table = hash_table_init_with_capacity<i32, const char *>(62, memory::temp_allocator());
+			auto table = hash_table_init_with_capacity<I32, const char *>(62, memory::temp_allocator());
 			TESTER_CHECK(table.count == 0);
 			TESTER_CHECK(table.capacity == 64);
 
@@ -649,7 +651,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 		}
 
 		{
-			auto table = hash_table_init_from<i32, const char *>({ {1, "Hello"}, {2, "World!"} }, memory::temp_allocator());
+			auto table = hash_table_init_from<I32, const char *>({ {1, "Hello"}, {2, "World!"} }, memory::temp_allocator());
 
 			TESTER_CHECK(table.count == 2);
 			TESTER_CHECK(table.capacity == 8);
@@ -666,7 +668,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 		}
 
 		{
-			Hash_Table<i32, i32> table = {};
+			Hash_Table<I32, I32> table = {};
 			DEFER(hash_table_deinit(table));
 
 			hash_table_insert(table, 1, 1);
@@ -689,7 +691,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 	// ("operator[]")
 	{
-		Hash_Table<i32, i32> table = {};
+		Hash_Table<I32, I32> table = {};
 		DEFER(hash_table_deinit(table));
 
 		hash_table_insert(table, 1, 1);
@@ -709,13 +711,13 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 		TESTER_CHECK(table[1] == 3);
 
-		i32 x = table[1];
+		I32 x = table[1];
 		TESTER_CHECK(x == 3);
 
-		const i32 &xx = table[1];
+		const I32 &xx = table[1];
 		TESTER_CHECK(xx == 3);
 
-		i32 &y = table[1];
+		I32 &y = table[1];
 		y = 1;
 
 		TESTER_CHECK(table.entries[0].key == 1);
@@ -786,10 +788,10 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 	// ("i32, Foo")
 	{
-		Hash_Table<i32, Foo> table = hash_table_init<i32, Foo>();
+		Hash_Table<I32, Foo> table = hash_table_init<I32, Foo>();
 		DEFER(hash_table_deinit(table));
 
-		i32 key = 1;
+		I32 key = 1;
 		Foo value = Foo{1};
 
 		hash_table_insert(table, key, value);
@@ -807,7 +809,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 		TESTER_CHECK(hash_table_find(table, key)->value != Foo{1});
 		TESTER_CHECK(hash_table_find(table, key)->value == Foo{3});
 
-		i32 key2   = 2;
+		I32 key2   = 2;
 		Foo value2 = Foo{2};
 		hash_table_insert(table, key2, value2);
 		TESTER_CHECK(table.count == 2);
@@ -826,7 +828,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 	// ("reserve")
 	{
-		Hash_Table<i32, i32> table = hash_table_init<i32, i32>(memory::temp_allocator());
+		Hash_Table<I32, I32> table = hash_table_init<I32, I32>(memory::temp_allocator());
 
 		hash_table_reserve(table, 100);
 
@@ -846,7 +848,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 		TESTER_CHECK(table.slots.count    == 128);
 		TESTER_CHECK(table.slots.capacity == 128);
 
-		for (i32 i = 0; i < 50; ++i)
+		for (I32 i = 0; i < 50; ++i)
 			hash_table_insert(table, i, i);
 
 		hash_table_reserve(table, 100);
@@ -863,7 +865,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 	{
 		// ("unordered")
 		{
-			Hash_Table<i32, i32> table = {};
+			Hash_Table<I32, I32> table = {};
 			DEFER(hash_table_deinit(table));
 
 			TESTER_CHECK(hash_table_remove(table, 0) == false);
@@ -873,7 +875,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 		// ("ordered")
 		{
-			Hash_Table<i32, i32> table = {};
+			Hash_Table<I32, I32> table = {};
 			DEFER(hash_table_deinit(table));
 			hash_table_insert(table, 1, 1);
 			hash_table_insert(table, 2, 2);
@@ -918,19 +920,19 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 	// ("resize")
 	{
-		Hash_Table<i32, f32> table = hash_table_init<i32, f32>(memory::temp_allocator());
+		Hash_Table<I32, F32> table = hash_table_init<I32, F32>(memory::temp_allocator());
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 			hash_table_insert(table, i, i + 0.5f);
 
 		TESTER_CHECK(table.count == 100);
 		TESTER_CHECK(table.capacity == 256);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (const auto &entry : table)
 			TESTER_CHECK(entry.key == j++);
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 		{
 			auto pair = hash_table_find(table, i);
 			TESTER_CHECK(pair != nullptr);
@@ -938,14 +940,14 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 			TESTER_CHECK(pair->value == (i + 0.5f));
 		}
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 		{
 			TESTER_CHECK(hash_table_remove(table, i) == true);
 			TESTER_CHECK(hash_table_find(table, i) == nullptr);
-			TESTER_CHECK(table.count == u64(99 - i));
+			TESTER_CHECK(table.count == U64(99 - i));
 		}
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 		{
 			TESTER_CHECK(hash_table_remove(table, i) == false);
 			TESTER_CHECK(hash_table_find(table, i) == nullptr);
@@ -957,15 +959,15 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 	// ("clear")
 	{
-		Hash_Table<i32, i32> table = hash_table_init<i32, i32>(memory::temp_allocator());
+		Hash_Table<I32, I32> table = hash_table_init<I32, I32>(memory::temp_allocator());
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_table_insert(table, i, i + 1);
 
 		TESTER_CHECK(table.count == 10);
 		TESTER_CHECK(table.capacity == 16);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (auto &entry : table)
 		{
 			TESTER_CHECK(entry.key == j++);
@@ -977,19 +979,19 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 		TESTER_CHECK(table.count == 0);
 		TESTER_CHECK(table.capacity == 16);
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			TESTER_CHECK(hash_table_find(table, i) == nullptr);
 	}
 
 	// ("copy/clone/destroy")
 	{
-		Hash_Table<i32, i32> table1 = hash_table_init<i32, i32>();
+		Hash_Table<I32, I32> table1 = hash_table_init<I32, I32>();
 		DEFER(destroy(table1));
 
 		TESTER_CHECK(table1.count    == 0);
 		TESTER_CHECK(table1.capacity == 0);
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_table_insert(table1, i, i + 1);
 
 		TESTER_CHECK(table1.count    == 10);
@@ -1001,23 +1003,23 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 		TESTER_CHECK(table2.count == 10);
 		TESTER_CHECK(table2.capacity == 16);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (auto &entry : table2)
 		{
 			TESTER_CHECK(entry.key == j++);
 			TESTER_CHECK(entry.value == j);
 		}
 
-		auto table3 = hash_table_init<Foo, i32>();
+		auto table3 = hash_table_init<Foo, I32>();
 		DEFER(destroy(table3));
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_table_insert(table3, Foo{i}, i + 10);
 
 		TESTER_CHECK(table3.count == 10);
 		TESTER_CHECK(table3.capacity == 16);
 
-		i32 k = 0;
+		I32 k = 0;
 		for (const auto &entry : table3)
 		{
 			TESTER_CHECK(entry.key == Foo{k});
@@ -1042,18 +1044,18 @@ TESTER_TEST("[CONTAINERS]: Hash_Table")
 
 	// ("user defined key [Foo]")
 	{
-		Hash_Table<Foo, i32> table = hash_table_init<Foo, i32>(memory::temp_allocator());
+		Hash_Table<Foo, I32> table = hash_table_init<Foo, I32>(memory::temp_allocator());
 
 		TESTER_CHECK(table.count    == 0);
 		TESTER_CHECK(table.capacity == 0);
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_table_insert(table, Foo{i}, i + 1);
 
 		TESTER_CHECK(table.count == 10);
 		TESTER_CHECK(table.capacity == 16);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (const auto &entry : table)
 		{
 			TESTER_CHECK(entry.key == Foo{j++});
@@ -1079,7 +1081,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		}
 
 		{
-			auto set = hash_set_init_with_capacity<i32>(62, memory::temp_allocator());
+			auto set = hash_set_init_with_capacity<I32>(62, memory::temp_allocator());
 			TESTER_CHECK(set.count == 0);
 			TESTER_CHECK(set.capacity == 64);
 
@@ -1095,7 +1097,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		}
 
 		{
-			auto set = hash_set_init_from<i32>({1, 2}, memory::temp_allocator());
+			auto set = hash_set_init_from<I32>({1, 2}, memory::temp_allocator());
 
 			TESTER_CHECK(set.count == 2);
 			TESTER_CHECK(set.capacity == 8);
@@ -1109,7 +1111,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		}
 
 		{
-			Hash_Set<i32> set = {};
+			Hash_Set<I32> set = {};
 			DEFER(hash_set_deinit(set));
 
 			hash_set_insert(set, 1);
@@ -1181,10 +1183,10 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 
 	// ("i32, Foo")
 	{
-		Hash_Set<i32> set = hash_set_init<i32>();
+		Hash_Set<I32> set = hash_set_init<I32>();
 		DEFER(hash_set_deinit(set));
 
-		i32 key = 1;
+		I32 key = 1;
 		hash_set_insert(set, key);
 
 		TESTER_CHECK(set.count == 1);
@@ -1198,7 +1200,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		TESTER_CHECK(hash_set_find(set, key) != nullptr);
 		TESTER_CHECK(*hash_set_find(set, key) == 1);
 
-		i32 key2   = 2;
+		I32 key2   = 2;
 		hash_set_insert(set, key2);
 		TESTER_CHECK(set.count == 2);
 
@@ -1216,7 +1218,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 
 	// ("reserve")
 	{
-		Hash_Set<i32> set = hash_set_init<i32>(memory::temp_allocator());
+		Hash_Set<I32> set = hash_set_init<I32>(memory::temp_allocator());
 
 		hash_set_reserve(set, 100);
 
@@ -1236,7 +1238,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		TESTER_CHECK(set.slots.count    == 128);
 		TESTER_CHECK(set.slots.capacity == 128);
 
-		for (i32 i = 0; i < 50; ++i)
+		for (I32 i = 0; i < 50; ++i)
 			hash_set_insert(set, i);
 
 		hash_set_reserve(set, 100);
@@ -1253,7 +1255,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 	{
 		// ("unordered")
 		{
-			Hash_Set<i32> set = {};
+			Hash_Set<I32> set = {};
 			DEFER(hash_set_deinit(set));
 
 			TESTER_CHECK(hash_set_remove(set, 0) == false);
@@ -1263,7 +1265,7 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 
 		// ("ordered")
 		{
-			Hash_Set<i32> set = {};
+			Hash_Set<I32> set = {};
 			DEFER(hash_set_deinit(set));
 			hash_set_insert(set, 1);
 			hash_set_insert(set, 2);
@@ -1300,33 +1302,33 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 
 	// ("resize")
 	{
-		Hash_Set<i32> set = hash_set_init<i32>(memory::temp_allocator());
+		Hash_Set<I32> set = hash_set_init<I32>(memory::temp_allocator());
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 			hash_set_insert(set, i);
 
 		TESTER_CHECK(set.count == 100);
 		TESTER_CHECK(set.capacity == 256);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (const auto &entry : set)
 			TESTER_CHECK(entry == j++);
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 		{
 			auto entry = hash_set_find(set, i);
 			TESTER_CHECK(entry != nullptr);
 			TESTER_CHECK(*entry == i);
 		}
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 		{
 			TESTER_CHECK(hash_set_remove(set, i) == true);
 			TESTER_CHECK(hash_set_find(set, i) == nullptr);
-			TESTER_CHECK(set.count == u64(99 - i));
+			TESTER_CHECK(set.count == U64(99 - i));
 		}
 
-		for (i32 i = 0; i < 100; ++i)
+		for (I32 i = 0; i < 100; ++i)
 		{
 			TESTER_CHECK(hash_set_remove(set, i) == false);
 			TESTER_CHECK(hash_set_find(set, i) == nullptr);
@@ -1338,15 +1340,15 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 
 	// ("clear")
 	{
-		Hash_Set<i32> set = hash_set_init<i32>(memory::temp_allocator());
+		Hash_Set<I32> set = hash_set_init<I32>(memory::temp_allocator());
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_set_insert(set, i);
 
 		TESTER_CHECK(set.count == 10);
 		TESTER_CHECK(set.capacity == 16);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (const auto &entry : set)
 			TESTER_CHECK(entry == j++);
 
@@ -1355,19 +1357,19 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		TESTER_CHECK(set.count == 0);
 		TESTER_CHECK(set.capacity == 16);
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			TESTER_CHECK(hash_set_find(set, i) == nullptr);
 	}
 
 	// ("copy/clone/destroy")
 	{
-		Hash_Set<i32> set1 = hash_set_init<i32>();
+		Hash_Set<I32> set1 = hash_set_init<I32>();
 		DEFER(destroy(set1));
 
 		TESTER_CHECK(set1.count    == 0);
 		TESTER_CHECK(set1.capacity == 0);
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_set_insert(set1, i);
 
 		TESTER_CHECK(set1.count    == 10);
@@ -1379,20 +1381,20 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		TESTER_CHECK(set2.count == 10);
 		TESTER_CHECK(set2.capacity == 16);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (const auto &entry : set2)
 			TESTER_CHECK(entry == j++);
 
 		auto set3 = hash_set_init<Foo>();
 		DEFER(destroy(set3));
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_set_insert(set3, Foo{i});
 
 		TESTER_CHECK(set3.count == 10);
 		TESTER_CHECK(set3.capacity == 16);
 
-		i32 k = 0;
+		I32 k = 0;
 		for (const auto &entry : set3)
 		{
 			TESTER_CHECK(entry == Foo{k});
@@ -1420,13 +1422,13 @@ TESTER_TEST("[CONTAINERS]: Hash_Set")
 		TESTER_CHECK(table.count    == 0);
 		TESTER_CHECK(table.capacity == 0);
 
-		for (i32 i = 0; i < 10; ++i)
+		for (I32 i = 0; i < 10; ++i)
 			hash_set_insert(table, Foo{i});
 
 		TESTER_CHECK(table.count == 10);
 		TESTER_CHECK(table.capacity == 16);
 
-		i32 j = 0;
+		I32 j = 0;
 		for (const auto &entry : table)
 			TESTER_CHECK(entry == Foo{j++});
 	}
@@ -1445,4 +1447,278 @@ TESTER_TEST("[CONTAINERS]: String Interner")
 	const char *begin = test_string + 15;
 	const char *end = begin + 6;
 	TESTER_CHECK(s == string_interner_intern(interner, begin, end));
+}
+
+TESTER_TEST("[CONTAINERS]: Span")
+{
+	// ("span_init from pointer + count")
+	{
+		I32 values[] = {10, 20, 30};
+		auto span = span_init(values, 3);
+		TESTER_CHECK(span.data == values);
+		TESTER_CHECK(span.count == 3);
+	}
+
+	// ("span_init from two pointers")
+	{
+		I32 values[] = {1, 2, 3, 4, 5};
+		auto span = span_init(values, values + 5);
+		TESTER_CHECK(span.data == values);
+		TESTER_CHECK(span.count == 5);
+	}
+
+	// ("span_init from C array")
+	{
+		I32 values[4] = {7, 8, 9, 10};
+		auto span = span_init(values);
+		TESTER_CHECK(span.data == values);
+		TESTER_CHECK(span.count == 4);
+	}
+
+	// ("span_init from Array")
+	{
+		auto array = array_init_from<I32>({1, 2, 3});
+		DEFER(array_deinit(array));
+
+		auto span = span_init(array);
+		TESTER_CHECK(span.data == array.data);
+		TESTER_CHECK(span.count == array.count);
+	}
+
+	// ("span_init from Stack_Array")
+	{
+		Stack_Array<I32, 3> array{{1, 2, 3}};
+		auto span = span_init(array);
+		TESTER_CHECK(span.data == array.data);
+		TESTER_CHECK(span.count == 3);
+	}
+
+	// ("span_init from c-string")
+	{
+		Span<const char> span = span_init("hello");
+		TESTER_CHECK(span.count == 5);
+		TESTER_CHECK(span[0] == 'h');
+		TESTER_CHECK(span[4] == 'o');
+	}
+
+	// ("span_init from initializer_list — used inline as function argument")
+	{
+		auto check = [](Span<const I32> span) {
+			TESTER_CHECK(span.count == 4);
+			TESTER_CHECK(span[0] == 1);
+			TESTER_CHECK(span[3] == 4);
+		};
+		check(span_init({1, 2, 3, 4}));
+	}
+
+	// ("operator[]")
+	{
+		I32 values[] = {10, 20, 30};
+		auto span = span_init(values, 3);
+		TESTER_CHECK(span[0] == 10);
+		TESTER_CHECK(span[1] == 20);
+		TESTER_CHECK(span[2] == 30);
+	}
+
+	// ("mutation through span")
+	{
+		I32 values[] = {1, 2, 3};
+		auto span = span_init(values, 3);
+		span[0] = 99;
+		TESTER_CHECK(values[0] == 99);
+	}
+
+	// ("span_is_empty")
+	{
+		I32 values[] = {1};
+		TESTER_CHECK(span_is_empty(span_init((I32 *)nullptr, (U64)0)) == true);
+		TESTER_CHECK(span_is_empty(span_init(values, 1)) == false);
+	}
+
+	// ("span_first / span_last")
+	{
+		I32 values[] = {10, 20, 30};
+		auto span = span_init(values, 3);
+		TESTER_CHECK(span_first(span) == 10);
+		TESTER_CHECK(span_last(span) == 30);
+
+		span_first(span) = 99;
+		span_last(span) = 77;
+		TESTER_CHECK(values[0] == 99);
+		TESTER_CHECK(values[2] == 77);
+	}
+
+	// ("range-based for")
+	{
+		I32 values[] = {1, 2, 3, 4, 5};
+		auto span = span_init(values, 5);
+		I32 sum = 0;
+		for (I32 v : span)
+			sum += v;
+		TESTER_CHECK(sum == 15);
+	}
+}
+
+TESTER_TEST("[CONTAINERS]: Ring_Buffer")
+{
+	// ("init")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		TESTER_CHECK(rb.data     == nullptr);
+		TESTER_CHECK(rb.count    == 0);
+		TESTER_CHECK(rb.capacity == 0);
+		TESTER_CHECK(rb.head     == 0);
+		TESTER_CHECK(rb.allocator != nullptr);
+	}
+
+	// ("push_back / first / last")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		ring_buffer_push_back(rb, 1);
+		ring_buffer_push_back(rb, 2);
+		ring_buffer_push_back(rb, 3);
+
+		TESTER_CHECK(rb.count == 3);
+		TESTER_CHECK(ring_buffer_front(rb) == 1);
+		TESTER_CHECK(ring_buffer_back(rb)  == 3);
+		TESTER_CHECK(rb[0] == 1);
+		TESTER_CHECK(rb[1] == 2);
+		TESTER_CHECK(rb[2] == 3);
+	}
+
+	// ("push_front / first / last")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		ring_buffer_push_front(rb, 3);
+		ring_buffer_push_front(rb, 2);
+		ring_buffer_push_front(rb, 1);
+
+		TESTER_CHECK(rb.count == 3);
+		TESTER_CHECK(ring_buffer_front(rb) == 1);
+		TESTER_CHECK(ring_buffer_back(rb)  == 3);
+		TESTER_CHECK(rb[0] == 1);
+		TESTER_CHECK(rb[1] == 2);
+		TESTER_CHECK(rb[2] == 3);
+	}
+
+	// ("pop_front — FIFO behaviour")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		ring_buffer_push_back(rb, 10);
+		ring_buffer_push_back(rb, 20);
+		ring_buffer_push_back(rb, 30);
+
+		TESTER_CHECK(ring_buffer_front(rb) == 10);
+		ring_buffer_pop_front(rb);
+		TESTER_CHECK(rb.count == 2);
+		TESTER_CHECK(ring_buffer_front(rb) == 20);
+		ring_buffer_pop_front(rb);
+		TESTER_CHECK(ring_buffer_front(rb) == 30);
+		ring_buffer_pop_front(rb);
+		TESTER_CHECK(rb.count == 0);
+	}
+
+	// ("pop_back — stack behaviour")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		ring_buffer_push_back(rb, 10);
+		ring_buffer_push_back(rb, 20);
+		ring_buffer_push_back(rb, 30);
+
+		TESTER_CHECK(ring_buffer_back(rb) == 30);
+		ring_buffer_pop_back(rb);
+		TESTER_CHECK(rb.count == 2);
+		TESTER_CHECK(ring_buffer_back(rb) == 20);
+	}
+
+	// ("wrap-around: head advances past end, tail wraps")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		// fill to 8 (initial growth)
+		for (I32 i = 0; i < 8; ++i)
+			ring_buffer_push_back(rb, i);
+
+		// drain 4 from front → head=4
+		for (I32 i = 0; i < 4; ++i)
+			ring_buffer_pop_front(rb);
+
+		TESTER_CHECK(rb.count == 4);
+		TESTER_CHECK(rb.head  == 4);
+
+		// push 4 more → tail wraps past end
+		for (I32 i = 8; i < 12; ++i)
+			ring_buffer_push_back(rb, i);
+
+		TESTER_CHECK(rb.count == 8);
+		// logical order must be 4,5,6,7,8,9,10,11
+		for (U64 i = 0; i < rb.count; ++i)
+			TESTER_CHECK(rb[i] == I32(i + 4));
+	}
+
+	// ("reserve: linearizes wrapped buffer correctly")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		for (I32 i = 0; i < 8; ++i)
+			ring_buffer_push_back(rb, i);
+
+		// advance head to create a wrap condition on next pushes
+		for (I32 i = 0; i < 4; ++i)
+			ring_buffer_pop_front(rb);
+		for (I32 i = 8; i < 12; ++i)
+			ring_buffer_push_back(rb, i);  // wraps tail past index 0
+
+		// now force a grow (buffer is full at 8)
+		ring_buffer_push_back(rb, 12);
+
+		// after grow, head must be 0 and logical order preserved
+		TESTER_CHECK(rb.head == 0);
+		TESTER_CHECK(rb.count == 9);
+		for (U64 i = 0; i < rb.count; ++i)
+			TESTER_CHECK(rb[i] == I32(i + 4));
+	}
+
+	// ("is_empty / clear")
+	{
+		auto rb = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb));
+
+		TESTER_CHECK(ring_buffer_is_empty(rb) == true);
+		ring_buffer_push_back(rb, 1);
+		TESTER_CHECK(ring_buffer_is_empty(rb) == false);
+		ring_buffer_clear(rb);
+		TESTER_CHECK(rb.count == 0);
+		TESTER_CHECK(rb.head  == 0);
+		TESTER_CHECK(ring_buffer_is_empty(rb) == true);
+	}
+
+	// ("copy")
+	{
+		auto rb1 = ring_buffer_init<I32>();
+		DEFER(ring_buffer_deinit(rb1));
+
+		for (I32 i = 0; i < 5; ++i)
+			ring_buffer_push_back(rb1, i);
+
+		auto rb2 = ring_buffer_copy(rb1);
+		DEFER(ring_buffer_deinit(rb2));
+
+		TESTER_CHECK(rb2.count == rb1.count);
+		TESTER_CHECK(rb2.head  == 0);  // copy is always linearized
+		for (U64 i = 0; i < rb2.count; ++i)
+			TESTER_CHECK(rb2[i] == rb1[i]);
+	}
 }
\ No newline at end of file
diff --git a/unittest/src/unittest_core.cpp b/unittest/src/unittest_core.cpp
index 89848b4c..f7011364 100644
--- a/unittest/src/unittest_core.cpp
+++ b/unittest/src/unittest_core.cpp
@@ -13,8 +13,8 @@ TESTER_TEST("[CORE]: Arena_Allocator")
 	memory::Arena_Allocator *arena = memory::arena_allocator_init(1024);
 	DEFER(memory::arena_allocator_deinit(arena));
 
-	void *a = memory::arena_allocator_allocate(arena, 4);
-	void *b = memory::arena_allocator_allocate(arena, 8);
+	void *a = memory::arena_allocator_allocate(arena, 4, 1);
+	void *b = memory::arena_allocator_allocate(arena, 8, 1);
 
 	TESTER_CHECK(a != nullptr);
 	TESTER_CHECK(b != nullptr);
@@ -27,7 +27,7 @@ TESTER_TEST("[CORE]: Arena_Allocator")
 	TESTER_CHECK(memory::arena_allocator_get_used_size(arena) == 0);
 	TESTER_CHECK(memory::arena_allocator_get_peak_size(arena) == 12);
 
-	arena_allocator_allocate(arena, 2048);
+	arena_allocator_allocate(arena, 2048, 1);
 
 	TESTER_CHECK(memory::arena_allocator_get_used_size(arena) == 2048);
 	TESTER_CHECK(memory::arena_allocator_get_peak_size(arena) == 2048);
@@ -42,7 +42,7 @@ TESTER_TEST("[CORE]: Pool_Allocator")
 {
 	struct Entity
 	{
-		f32 x, y, z;
+		F32 x, y, z;
 	};
 
 	memory::Pool_Allocator *pool = memory::pool_allocator_init(sizeof(Entity), 10);
@@ -67,7 +67,7 @@ TESTER_TEST("[CORE]: Pool_Allocator")
 	TESTER_CHECK(p5 == e3);
 }
 
-inline static Result<i32>
+inline static Result<I32>
 _result_test_with_default_error_pseudo_disk_read(bool success)
 {
 	if (success)
@@ -77,7 +77,7 @@ _result_test_with_default_error_pseudo_disk_read(bool success)
 
 enum class PSEUDO_DISK_READ_RESULT_CODE { OK, NOT_OK };
 
-inline static Result<i32, PSEUDO_DISK_READ_RESULT_CODE>
+inline static Result<I32, PSEUDO_DISK_READ_RESULT_CODE>
 _result_test_with_custom_error_pseudo_disk_read(bool success)
 {
 	if (success)
diff --git a/unittest/src/unittest_formatter.cpp b/unittest/src/unittest_formatter.cpp
index 0e30fb04..0e1c8bab 100644
--- a/unittest/src/unittest_formatter.cpp
+++ b/unittest/src/unittest_formatter.cpp
@@ -3,7 +3,7 @@
 
 struct vec3
 {
-	f32 x, y, z;
+	F32 x, y, z;
 };
 
 inline static String
@@ -89,7 +89,7 @@ TESTER_TEST("[CORE]: Formatter")
 		buffer = format("{}", "{{ \"name\": \"n\" }}", memory::temp_allocator());
 		TESTER_CHECK(buffer == "{{ \"name\": \"n\" }}");
 
-		i32 x = 1;
+		I32 x = 1;
 		buffer = format("{}", &x, memory::temp_allocator());
 
 		char test[] = "test";
@@ -107,10 +107,10 @@ TESTER_TEST("[CORE]: Formatter")
 		buffer = format("{}", array_of_strings, memory::temp_allocator());
 		TESTER_CHECK(buffer == "[2] { Hello, World }");
 
-		buffer = format("{}", array_init_from<i32>({1, 2, 3}, memory::temp_allocator()), memory::temp_allocator());
+		buffer = format("{}", array_init_from<I32>({1, 2, 3}, memory::temp_allocator()), memory::temp_allocator());
 		TESTER_CHECK(buffer == "[3] { 1, 2, 3 }");
 
-		buffer = format("{}", hash_table_init_from<i32, const char *>({{1, "1"}, {2, "2"}, {3, "3"}}, memory::temp_allocator()), memory::temp_allocator());
+		buffer = format("{}", hash_table_init_from<I32, const char *>({{1, "1"}, {2, "2"}, {3, "3"}}, memory::temp_allocator()), memory::temp_allocator());
 		TESTER_CHECK(buffer == "[3] { 1: 1, 2: 2, 3: 3 }");
 
 		buffer = format("{}{}{}{}{}", 1, 2, 3, "{}", 4, memory::temp_allocator());
@@ -235,12 +235,12 @@ TESTER_TEST("[CORE]: Formatter")
 
 		// ("hex with different integer types")
 		{
-			TESTER_CHECK(format("{:x}", (u8)255, memory::temp_allocator()) == "0xff");
-			TESTER_CHECK(format("{:x}", (u16)65535, memory::temp_allocator()) == "0xffff");
-			TESTER_CHECK(format("{:x}", (u32)4294967295, memory::temp_allocator()) == "0xffffffff");
-			TESTER_CHECK(format("{:x}", (u64)18446744073709551615ULL, memory::temp_allocator()) == "0xffffffffffffffff");
-			TESTER_CHECK(format("{:x}", (u8)-1, memory::temp_allocator()) == "0xff");
-			TESTER_CHECK(format("{:x}", (u16)-1, memory::temp_allocator()) == "0xffff");
+			TESTER_CHECK(format("{:x}", (U8)255, memory::temp_allocator()) == "0xff");
+			TESTER_CHECK(format("{:x}", (U16)65535, memory::temp_allocator()) == "0xffff");
+			TESTER_CHECK(format("{:x}", (U32)4294967295, memory::temp_allocator()) == "0xffffffff");
+			TESTER_CHECK(format("{:x}", (U64)18446744073709551615ULL, memory::temp_allocator()) == "0xffffffffffffffff");
+			TESTER_CHECK(format("{:x}", (U8)-1, memory::temp_allocator()) == "0xff");
+			TESTER_CHECK(format("{:x}", (U16)-1, memory::temp_allocator()) == "0xffff");
 		}
 	}
 
@@ -491,12 +491,12 @@ TESTER_TEST("[CORE]: Formatter")
 
 		// ("hex with different integer types")
 		{
-			TESTER_CHECK(format("{:x}", (u8)255, memory::temp_allocator()) == "0xff");
-			TESTER_CHECK(format("{:x}", (u16)65535, memory::temp_allocator()) == "0xffff");
-			TESTER_CHECK(format("{:x}", (u32)4294967295, memory::temp_allocator()) == "0xffffffff");
-			TESTER_CHECK(format("{:x}", (u64)18446744073709551615ULL, memory::temp_allocator()) == "0xffffffffffffffff");
-			TESTER_CHECK(format("{:x}", (u8)-1, memory::temp_allocator()) == "0xff");
-			TESTER_CHECK(format("{:x}", (u16)-1, memory::temp_allocator()) == "0xffff");
+			TESTER_CHECK(format("{:x}", (U8)255, memory::temp_allocator()) == "0xff");
+			TESTER_CHECK(format("{:x}", (U16)65535, memory::temp_allocator()) == "0xffff");
+			TESTER_CHECK(format("{:x}", (U32)4294967295, memory::temp_allocator()) == "0xffffffff");
+			TESTER_CHECK(format("{:x}", (U64)18446744073709551615ULL, memory::temp_allocator()) == "0xffffffffffffffff");
+			TESTER_CHECK(format("{:x}", (U8)-1, memory::temp_allocator()) == "0xff");
+			TESTER_CHECK(format("{:x}", (U16)-1, memory::temp_allocator()) == "0xffff");
 		}
 	}
 
diff --git a/unittest/src/unittest_math.cpp b/unittest/src/unittest_math.cpp
new file mode 100644
index 00000000..feb11d47
--- /dev/null
+++ b/unittest/src/unittest_math.cpp
@@ -0,0 +1,1031 @@
+#include <core/tester.h>
+#include <core/math/f32x4x4.h>
+#include <core/math/f32x3x3.h>
+#include <core/math/f32x2x2.h>
+#include <core/math/f64x4x4.h>
+#include <core/math/f64x3x3.h>
+#include <core/math/f64x2x2.h>
+#include <core/math/f32x4.h>
+#include <core/math/f32x3.h>
+#include <core/math/f32x2.h>
+#include <core/math/f64x4.h>
+#include <core/math/f64x3.h>
+#include <core/math/f64x2.h>
+#include <core/math/i32x4.h>
+#include <core/math/i32x3.h>
+#include <core/math/i32x2.h>
+#include <core/math/u32x4.h>
+#include <core/math/u32x3.h>
+#include <core/math/u32x2.h>
+#include <core/math/quaternion.h>
+#include <core/math/random.h>
+#include <core/math/f32.h>
+#include <core/math/f64.h>
+#include <core/math/i32.h>
+#include <core/math/u32.h>
+#include <core/math/i64.h>
+#include <core/math/u64.h>
+
+// ============================================================================
+// Scalar helpers — F32
+// ============================================================================
+
+TESTER_TEST("[MATH][f32]: constants")
+{
+	TESTER_CHECK(f32_approx_equal(F32_PI * 2.0f, F32_TAU, F32_EPSILON));
+	TESTER_CHECK(f32_approx_equal(F32_PI * 0.5f, F32_PI_OVER_2, F32_EPSILON));
+	TESTER_CHECK(f32_approx_equal(F32_PI * F32_TO_DEGREES, 180.0f, 1e-4f));
+	TESTER_CHECK(f32_approx_equal(180.0f * F32_TO_RADIANS, F32_PI, 1e-4f));
+}
+
+TESTER_TEST("[MATH][f32]: basic arithmetic")
+{
+	TESTER_CHECK(f32_abs(-3.5f) == 3.5f);
+	TESTER_CHECK(f32_abs( 3.5f) == 3.5f);
+
+	TESTER_CHECK(f32_sign(-2.0f) == -1.0f);
+	TESTER_CHECK(f32_sign( 0.0f) ==  0.0f);
+	TESTER_CHECK(f32_sign( 2.0f) ==  1.0f);
+
+	TESTER_CHECK(f32_min(2.0f, 3.0f) == 2.0f);
+	TESTER_CHECK(f32_max(2.0f, 3.0f) == 3.0f);
+
+	TESTER_CHECK(f32_clamp(-5.0f, 0.0f, 10.0f) ==  0.0f);
+	TESTER_CHECK(f32_clamp( 5.0f, 0.0f, 10.0f) ==  5.0f);
+	TESTER_CHECK(f32_clamp(15.0f, 0.0f, 10.0f) == 10.0f);
+
+	TESTER_CHECK(f32_lerp(0.0f, 10.0f, 0.25f) == 2.5f);
+	TESTER_CHECK(f32_lerp(0.0f, 10.0f, 0.0f)  == 0.0f);
+	TESTER_CHECK(f32_lerp(0.0f, 10.0f, 1.0f)  == 10.0f);
+}
+
+TESTER_TEST("[MATH][f32]: trig + power")
+{
+	TESTER_CHECK(f32_approx_equal(f32_sqrt(4.0f), 2.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32_sin(0.0f), 0.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32_cos(0.0f), 1.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32_sin(F32_PI_OVER_2), 1.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32_cos(F32_PI_OVER_2), 0.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32_power(2.0f, 10.0f), 1024.0f, 1e-3f));
+	TESTER_CHECK(f32_approx_equal(f32_modulo(10.5f, 3.0f), 1.5f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32_atan2(1.0f, 0.0f), F32_PI_OVER_2, 1e-6f));
+}
+
+TESTER_TEST("[MATH][f32]: special values")
+{
+	TESTER_CHECK(f32_is_nan(F32_NAN));
+	TESTER_CHECK(!f32_is_nan(0.0f));
+	TESTER_CHECK(!f32_is_nan(F32_INFINITY));
+
+	TESTER_CHECK(f32_is_infinite(F32_INFINITY));
+	TESTER_CHECK(f32_is_infinite(F32_NEG_INFINITY));
+	TESTER_CHECK(!f32_is_infinite(1.0f));
+	TESTER_CHECK(!f32_is_infinite(F32_NAN));
+
+	TESTER_CHECK(f32_is_finite(0.0f));
+	TESTER_CHECK(f32_is_finite(-1e30f));
+	TESTER_CHECK(!f32_is_finite(F32_INFINITY));
+	TESTER_CHECK(!f32_is_finite(F32_NAN));
+}
+
+TESTER_TEST("[MATH][f32]: smoothstep / smootherstep")
+{
+	TESTER_CHECK(f32_smoothstep(0.0f, 1.0f, -0.5f) == 0.0f);
+	TESTER_CHECK(f32_smoothstep(0.0f, 1.0f,  1.5f) == 1.0f);
+	TESTER_CHECK(f32_approx_equal(f32_smoothstep(0.0f, 1.0f, 0.5f), 0.5f, 1e-6f));
+
+	TESTER_CHECK(f32_smootherstep(0.0f, 1.0f, -0.5f) == 0.0f);
+	TESTER_CHECK(f32_smootherstep(0.0f, 1.0f,  1.5f) == 1.0f);
+	TESTER_CHECK(f32_approx_equal(f32_smootherstep(0.0f, 1.0f, 0.5f), 0.5f, 1e-6f));
+}
+
+TESTER_TEST("[MATH][f32]: easing endpoints")
+{
+	// Every easing function maps 0 -> 0 and 1 -> 1.
+	TESTER_CHECK(f32_ease_in_quad(0.0f)      == 0.0f);
+	TESTER_CHECK(f32_ease_in_quad(1.0f)      == 1.0f);
+	TESTER_CHECK(f32_ease_out_quad(0.0f)     == 0.0f);
+	TESTER_CHECK(f32_ease_out_quad(1.0f)     == 1.0f);
+	TESTER_CHECK(f32_ease_in_out_quad(0.0f)  == 0.0f);
+	TESTER_CHECK(f32_ease_in_out_quad(1.0f)  == 1.0f);
+
+	TESTER_CHECK(f32_ease_in_cubic(0.0f)     == 0.0f);
+	TESTER_CHECK(f32_ease_in_cubic(1.0f)     == 1.0f);
+	TESTER_CHECK(f32_ease_out_cubic(0.0f)    == 0.0f);
+	TESTER_CHECK(f32_ease_out_cubic(1.0f)    == 1.0f);
+	TESTER_CHECK(f32_ease_in_out_cubic(0.0f) == 0.0f);
+	TESTER_CHECK(f32_ease_in_out_cubic(1.0f) == 1.0f);
+
+	TESTER_CHECK(f32_ease_in_elastic(0.0f)     == 0.0f);
+	TESTER_CHECK(f32_ease_in_elastic(1.0f)     == 1.0f);
+	TESTER_CHECK(f32_ease_out_elastic(0.0f)    == 0.0f);
+	TESTER_CHECK(f32_ease_out_elastic(1.0f)    == 1.0f);
+	TESTER_CHECK(f32_ease_in_out_elastic(0.0f) == 0.0f);
+	TESTER_CHECK(f32_ease_in_out_elastic(1.0f) == 1.0f);
+}
+
+TESTER_TEST("[MATH][f32]: smooth_damp converges")
+{
+	F32 current  = 0.0f;
+	F32 velocity = 0.0f;
+	F32 target   = 10.0f;
+	for (int i = 0; i < 300; ++i)
+		current = f32_smooth_damp(current, target, &velocity, 0.1f, 0.016f);
+	TESTER_CHECK(f32_approx_equal(current, target, 0.1f));
+}
+
+// ============================================================================
+// Scalar helpers — F64
+// ============================================================================
+
+TESTER_TEST("[MATH][f64]: constants + arithmetic")
+{
+	TESTER_CHECK(f64_approx_equal(F64_PI * 2.0, F64_TAU, F64_EPSILON));
+	TESTER_CHECK(f64_abs(-3.5) == 3.5);
+	TESTER_CHECK(f64_sign(-2.0) == -1.0);
+	TESTER_CHECK(f64_min(2.0, 3.0) == 2.0);
+	TESTER_CHECK(f64_max(2.0, 3.0) == 3.0);
+	TESTER_CHECK(f64_clamp(15.0, 0.0, 10.0) == 10.0);
+	TESTER_CHECK(f64_lerp(0.0, 10.0, 0.5) == 5.0);
+	TESTER_CHECK(f64_approx_equal(f64_sqrt(9.0), 3.0, 1e-9));
+	TESTER_CHECK(f64_approx_equal(f64_cos(0.0), 1.0, 1e-12));
+}
+
+TESTER_TEST("[MATH][f64]: special values")
+{
+	TESTER_CHECK(f64_is_nan(F64_NAN));
+	TESTER_CHECK(f64_is_infinite(F64_INFINITY));
+	TESTER_CHECK(f64_is_finite(0.0));
+	TESTER_CHECK(!f64_is_finite(F64_NAN));
+}
+
+// ============================================================================
+// Scalar helpers — I32 / I64 / U32 / U64
+// ============================================================================
+
+TESTER_TEST("[MATH][i32]: basic ops")
+{
+	TESTER_CHECK(i32_abs(-5) == 5);
+	TESTER_CHECK(i32_abs( 5) == 5);
+	TESTER_CHECK(i32_sign(-2) == -1);
+	TESTER_CHECK(i32_sign( 0) ==  0);
+	TESTER_CHECK(i32_sign( 2) ==  1);
+	TESTER_CHECK(i32_min(2, 3) == 2);
+	TESTER_CHECK(i32_max(2, 3) == 3);
+	TESTER_CHECK(i32_clamp(-5, 0, 10) == 0);
+	TESTER_CHECK(i32_clamp(15, 0, 10) == 10);
+}
+
+TESTER_TEST("[MATH][i64]: basic ops")
+{
+	TESTER_CHECK(i64_abs(-5ll) == 5ll);
+	TESTER_CHECK(i64_sign(-2ll) == -1ll);
+	TESTER_CHECK(i64_min(2ll, 3ll) == 2ll);
+	TESTER_CHECK(i64_max(2ll, 3ll) == 3ll);
+	TESTER_CHECK(i64_clamp(15ll, 0ll, 10ll) == 10ll);
+}
+
+TESTER_TEST("[MATH][u32]: basic ops")
+{
+	TESTER_CHECK(u32_min(2u, 3u) == 2u);
+	TESTER_CHECK(u32_max(2u, 3u) == 3u);
+	TESTER_CHECK(u32_clamp(5u, 0u, 10u) ==  5u);
+	TESTER_CHECK(u32_clamp(15u, 0u, 10u) == 10u);
+}
+
+TESTER_TEST("[MATH][u64]: basic ops")
+{
+	TESTER_CHECK(u64_min(2ull, 3ull) == 2ull);
+	TESTER_CHECK(u64_max(2ull, 3ull) == 3ull);
+	TESTER_CHECK(u64_clamp(15ull, 0ull, 10ull) == 10ull);
+}
+
+// ============================================================================
+// Vectors — F32x2
+// ============================================================================
+
+TESTER_TEST("[MATH][F32x2]: operators")
+{
+	F32x2 a = {3.0f, 4.0f};
+	F32x2 b = {1.0f, 2.0f};
+
+	TESTER_CHECK(a + b == F32x2{4.0f, 6.0f});
+	TESTER_CHECK(a - b == F32x2{2.0f, 2.0f});
+	TESTER_CHECK(-a   == F32x2{-3.0f, -4.0f});
+	TESTER_CHECK(a * 2.0f == F32x2{6.0f, 8.0f});
+	TESTER_CHECK(2.0f * a == F32x2{6.0f, 8.0f});
+	TESTER_CHECK(a / 2.0f == F32x2{1.5f, 2.0f});
+
+	F32x2 c = a; c += b;
+	TESTER_CHECK(c == F32x2{4.0f, 6.0f});
+}
+
+TESTER_TEST("[MATH][F32x2]: dot / cross / length")
+{
+	F32x2 a = {3.0f, 4.0f};
+	TESTER_CHECK(f32x2_length_squared(a) == 25.0f);
+	TESTER_CHECK(f32_approx_equal(f32x2_length(a), 5.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32x2_length(f32x2_normalize(a)), 1.0f, 1e-6f));
+
+	F32x2 b = {1.0f, 2.0f};
+	TESTER_CHECK(f32x2_dot(a, b) == 11.0f);
+
+	// 2D cross (scalar z-component of 3D cross).
+	TESTER_CHECK(f32x2_cross(F32x2{1.0f, 0.0f}, F32x2{0.0f, 1.0f}) ==  1.0f);
+	TESTER_CHECK(f32x2_cross(F32x2{0.0f, 1.0f}, F32x2{1.0f, 0.0f}) == -1.0f);
+}
+
+TESTER_TEST("[MATH][F32x2]: min/max/lerp/approx_equal")
+{
+	F32x2 a = {3.0f, -1.0f};
+	F32x2 b = {1.0f,  5.0f};
+	TESTER_CHECK(f32x2_min(a, b) == F32x2{1.0f, -1.0f});
+	TESTER_CHECK(f32x2_max(a, b) == F32x2{3.0f,  5.0f});
+	TESTER_CHECK(f32x2_lerp(F32x2{0.0f, 0.0f}, F32x2{10.0f, 20.0f}, 0.5f) == F32x2{5.0f, 10.0f});
+	TESTER_CHECK(f32x2_approx_equal(F32x2{1.0000001f, 2.0f}, F32x2{1.0f, 2.0f}, 1e-5f));
+}
+
+// ============================================================================
+// Vectors — F32x3 (+ canonical axis constants)
+// ============================================================================
+
+TESTER_TEST("[MATH][F32x3]: operators + dot + cross")
+{
+	F32x3 a = {1.0f, 2.0f, 3.0f};
+	F32x3 b = {4.0f, 5.0f, 6.0f};
+
+	TESTER_CHECK(a + b == F32x3{5.0f, 7.0f, 9.0f});
+	TESTER_CHECK(a - b == F32x3{-3.0f, -3.0f, -3.0f});
+	TESTER_CHECK(-a   == F32x3{-1.0f, -2.0f, -3.0f});
+	TESTER_CHECK(a * 2.0f == F32x3{2.0f, 4.0f, 6.0f});
+
+	TESTER_CHECK(f32x3_dot(a, b) == 32.0f);
+	TESTER_CHECK(f32x3_cross(F32X3_RIGHT, F32X3_UP) == F32X3_BACKWARD);
+	TESTER_CHECK(f32x3_cross(F32X3_UP, F32X3_RIGHT) == F32X3_FORWARD);
+}
+
+TESTER_TEST("[MATH][F32x3]: length/normalize/min/max/lerp")
+{
+	F32x3 v = {2.0f, 3.0f, 6.0f};
+	TESTER_CHECK(f32x3_length_squared(v) == 49.0f);
+	TESTER_CHECK(f32_approx_equal(f32x3_length(v), 7.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(f32x3_length(f32x3_normalize(v)), 1.0f, 1e-6f));
+
+	F32x3 a = {3.0f, -1.0f, 10.0f};
+	F32x3 b = {1.0f,  5.0f, -2.0f};
+	TESTER_CHECK(f32x3_min(a, b) == F32x3{1.0f, -1.0f, -2.0f});
+	TESTER_CHECK(f32x3_max(a, b) == F32x3{3.0f,  5.0f, 10.0f});
+	TESTER_CHECK(f32x3_lerp(F32X3_ZERO, F32x3{10.0f, 20.0f, 30.0f}, 0.5f) == F32x3{5.0f, 10.0f, 15.0f});
+
+	F32x3 clamp_v  = {-2.0f, 5.0f, 7.0f};
+	F32x3 clamp_lo = { 0.0f, 0.0f, 0.0f};
+	F32x3 clamp_hi = { 3.0f, 3.0f, 6.0f};
+	TESTER_CHECK(f32x3_clamp(clamp_v, clamp_lo, clamp_hi) == F32x3{0.0f, 3.0f, 6.0f});
+}
+
+TESTER_TEST("[MATH][F32x3]: canonical axis constants")
+{
+	TESTER_CHECK(F32X3_RIGHT    == F32x3{ 1.0f,  0.0f,  0.0f});
+	TESTER_CHECK(F32X3_UP       == F32x3{ 0.0f,  1.0f,  0.0f});
+	TESTER_CHECK(F32X3_FORWARD  == F32x3{ 0.0f,  0.0f, -1.0f});
+	TESTER_CHECK(F32X3_LEFT     == -F32X3_RIGHT);
+	TESTER_CHECK(F32X3_DOWN     == -F32X3_UP);
+	TESTER_CHECK(F32X3_BACKWARD == -F32X3_FORWARD);
+}
+
+// ============================================================================
+// Vectors — F32x4 (SIMD-backed)
+// ============================================================================
+
+TESTER_TEST("[MATH][F32x4]: alignment + size")
+{
+	// alignas(16) and exactly 16 bytes — must match std140 / MSL vec4.
+	TESTER_CHECK(sizeof(F32x4)  == 16);
+	TESTER_CHECK(alignof(F32x4) == 16);
+}
+
+TESTER_TEST("[MATH][F32x4]: operators")
+{
+	F32x4 a = {1.0f, 2.0f, 3.0f, 4.0f};
+	F32x4 b = {5.0f, 6.0f, 7.0f, 8.0f};
+
+	TESTER_CHECK(a + b == F32x4{6.0f, 8.0f, 10.0f, 12.0f});
+	TESTER_CHECK(b - a == F32x4{4.0f, 4.0f,  4.0f,  4.0f});
+	TESTER_CHECK(-a    == F32x4{-1.0f, -2.0f, -3.0f, -4.0f});
+	TESTER_CHECK(a * 2.0f == F32x4{2.0f, 4.0f, 6.0f, 8.0f});
+	TESTER_CHECK(2.0f * a == F32x4{2.0f, 4.0f, 6.0f, 8.0f});
+	TESTER_CHECK(a / 2.0f == F32x4{0.5f, 1.0f, 1.5f, 2.0f});
+}
+
+TESTER_TEST("[MATH][F32x4]: dot / length / normalize")
+{
+	F32x4 a = {1.0f, 2.0f, 3.0f, 4.0f};
+	F32x4 b = {5.0f, 6.0f, 7.0f, 8.0f};
+
+	TESTER_CHECK(f32_approx_equal(f32x4_dot(a, b), 70.0f, 1e-5f));
+
+	F32x4 v = {2.0f, 2.0f, 2.0f, 2.0f};  // length = sqrt(16) = 4
+	TESTER_CHECK(f32_approx_equal(f32x4_length_squared(v), 16.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(f32x4_length(v), 4.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(f32x4_length(f32x4_normalize(v)), 1.0f, 1e-5f));
+}
+
+TESTER_TEST("[MATH][F32x4]: min / max / lerp / approx_equal / from_f32")
+{
+	F32x4 a = {3.0f, -1.0f, 10.0f, 0.0f};
+	F32x4 b = {1.0f,  5.0f, -2.0f, 8.0f};
+
+	TESTER_CHECK(f32x4_min(a, b) == F32x4{1.0f, -1.0f, -2.0f, 0.0f});
+	TESTER_CHECK(f32x4_max(a, b) == F32x4{3.0f,  5.0f, 10.0f, 8.0f});
+
+	TESTER_CHECK(f32x4_lerp(F32X4_ZERO, F32X4_ONE, 0.25f) == F32x4{0.25f, 0.25f, 0.25f, 0.25f});
+	TESTER_CHECK(f32x4_approx_equal(F32x4{1.0f, 2.0f, 3.0f, 4.0f}, F32x4{1.0f, 2.0f, 3.0f, 4.0f}, 1e-6f));
+	TESTER_CHECK(f32x4_from_f32(3.5f) == F32x4{3.5f, 3.5f, 3.5f, 3.5f});
+}
+
+// ============================================================================
+// Vectors — F64x2 / F64x3 / F64x4
+// ============================================================================
+
+TESTER_TEST("[MATH][F64x2]: ops + dot + length")
+{
+	F64x2 a = {3.0, 4.0};
+	TESTER_CHECK(a + F64x2{1.0, 1.0} == F64x2{4.0, 5.0});
+	TESTER_CHECK(-a == F64x2{-3.0, -4.0});
+	TESTER_CHECK(a * 2.0 == F64x2{6.0, 8.0});
+	TESTER_CHECK(f64x2_dot(a, F64x2{1.0, 2.0}) == 11.0);
+	TESTER_CHECK(f64_approx_equal(f64x2_length(a), 5.0, 1e-12));
+	TESTER_CHECK(f64x2_cross(F64x2{1.0, 0.0}, F64x2{0.0, 1.0}) == 1.0);
+}
+
+TESTER_TEST("[MATH][F64x3]: ops + cross")
+{
+	F64x3 a = {1.0, 2.0, 3.0};
+	F64x3 b = {4.0, 5.0, 6.0};
+	TESTER_CHECK(a + b == F64x3{5.0, 7.0, 9.0});
+	TESTER_CHECK(f64x3_dot(a, b) == 32.0);
+	TESTER_CHECK(f64x3_cross(F64X3_RIGHT, F64X3_UP) == F64X3_BACKWARD);
+	TESTER_CHECK(F64X3_FORWARD == F64x3{0.0, 0.0, -1.0});
+
+	F64x3 clamp_v  = {-2.0, 5.0, 7.0};
+	F64x3 clamp_lo = { 0.0, 0.0, 0.0};
+	F64x3 clamp_hi = { 3.0, 3.0, 6.0};
+	TESTER_CHECK(f64x3_clamp(clamp_v, clamp_lo, clamp_hi) == F64x3{0.0, 3.0, 6.0});
+}
+
+TESTER_TEST("[MATH][F64x4]: alignment + ops + dot")
+{
+	TESTER_CHECK(sizeof(F64x4)  == 32);
+	TESTER_CHECK(alignof(F64x4) == 32);
+
+	F64x4 a = {1.0, 2.0, 3.0, 4.0};
+	F64x4 b = {5.0, 6.0, 7.0, 8.0};
+	TESTER_CHECK(a + b == F64x4{6.0, 8.0, 10.0, 12.0});
+	TESTER_CHECK(b - a == F64x4{4.0, 4.0,  4.0,  4.0});
+	TESTER_CHECK(a * 2.0 == F64x4{2.0, 4.0, 6.0, 8.0});
+	TESTER_CHECK(f64_approx_equal(f64x4_dot(a, b), 70.0, 1e-10));
+
+	F64x4 v = {2.0, 2.0, 2.0, 2.0};
+	TESTER_CHECK(f64_approx_equal(f64x4_length(v), 4.0, 1e-10));
+	TESTER_CHECK(f64_approx_equal(f64x4_length(f64x4_normalize(v)), 1.0, 1e-10));
+
+	TESTER_CHECK(f64x4_min(a, b) == a);
+	TESTER_CHECK(f64x4_max(a, b) == b);
+	TESTER_CHECK(f64x4_from_f64(1.5) == F64x4{1.5, 1.5, 1.5, 1.5});
+}
+
+// ============================================================================
+// Vectors — I32x2 / I32x3 / I32x4 / U32x2 / U32x3 / U32x4
+// ============================================================================
+
+TESTER_TEST("[MATH][I32x2]: basic ops")
+{
+	I32x2 a = {3, -4};
+	I32x2 b = {1,  2};
+	TESTER_CHECK(a + b == I32x2{4, -2});
+	TESTER_CHECK(a - b == I32x2{2, -6});
+	TESTER_CHECK(-a   == I32x2{-3, 4});
+	TESTER_CHECK(a * 2 == I32x2{6, -8});
+	TESTER_CHECK(i32x2_dot(a, b) == -5);
+	TESTER_CHECK(i32x2_length_squared(a) == 25);
+	TESTER_CHECK(i32x2_abs(a) == I32x2{3, 4});
+	TESTER_CHECK(i32x2_min(a, b) == I32x2{1, -4});
+	TESTER_CHECK(i32x2_max(a, b) == I32x2{3,  2});
+	TESTER_CHECK(i32x2_clamp(I32x2{-5, 10}, I32x2{0, 0}, I32x2{4, 4}) == I32x2{0, 4});
+}
+
+TESTER_TEST("[MATH][I32x3]: basic ops")
+{
+	I32x3 a = {1, 2, 3};
+	I32x3 b = {4, 5, 6};
+	TESTER_CHECK(a + b == I32x3{5, 7, 9});
+	TESTER_CHECK(i32x3_dot(a, b) == 32);
+	TESTER_CHECK(i32x3_abs(I32x3{-1, -2, -3}) == I32x3{1, 2, 3});
+	TESTER_CHECK(i32x3_clamp(I32x3{-1, 5, 10}, I32x3{0, 0, 0}, I32x3{3, 3, 3}) == I32x3{0, 3, 3});
+}
+
+TESTER_TEST("[MATH][I32x4]: SIMD ops + alignment")
+{
+	TESTER_CHECK(sizeof(I32x4)  == 16);
+	TESTER_CHECK(alignof(I32x4) == 16);
+
+	I32x4 a = {1, 2, 3, 4};
+	I32x4 b = {5, 6, 7, 8};
+	TESTER_CHECK(a + b == I32x4{6, 8, 10, 12});
+	TESTER_CHECK(b - a == I32x4{4, 4,  4,  4});
+	TESTER_CHECK(-a    == I32x4{-1, -2, -3, -4});
+	TESTER_CHECK(a * 3 == I32x4{3, 6, 9, 12});
+	TESTER_CHECK(i32x4_dot(a, b) == 70);
+	TESTER_CHECK(i32x4_abs(I32x4{-1, 2, -3, 4}) == I32x4{1, 2, 3, 4});
+	TESTER_CHECK(i32x4_min(a, b) == a);
+	TESTER_CHECK(i32x4_max(a, b) == b);
+	TESTER_CHECK(i32x4_clamp(I32x4{-5, 0, 5, 20}, I32x4{0, 0, 0, 0}, I32x4{10, 10, 10, 10}) == I32x4{0, 0, 5, 10});
+	TESTER_CHECK(i32x4_from_i32(7) == I32x4{7, 7, 7, 7});
+}
+
+TESTER_TEST("[MATH][U32x2]: basic ops")
+{
+	U32x2 a = {3u, 4u};
+	U32x2 b = {1u, 2u};
+	TESTER_CHECK(a + b == U32x2{4u, 6u});
+	TESTER_CHECK(a - b == U32x2{2u, 2u});
+	TESTER_CHECK(a * 2u == U32x2{6u, 8u});
+	TESTER_CHECK(u32x2_dot(a, b) == 11u);
+	TESTER_CHECK(u32x2_length_squared(a) == 25u);
+	TESTER_CHECK(u32x2_min(a, b) == U32x2{1u, 2u});
+	TESTER_CHECK(u32x2_max(a, b) == U32x2{3u, 4u});
+	TESTER_CHECK(u32x2_clamp(U32x2{10u, 5u}, U32x2{0u, 0u}, U32x2{4u, 4u}) == U32x2{4u, 4u});
+}
+
+TESTER_TEST("[MATH][U32x3]: basic ops")
+{
+	U32x3 a = {1u, 2u, 3u};
+	U32x3 b = {4u, 5u, 6u};
+	TESTER_CHECK(a + b == U32x3{5u, 7u, 9u});
+	TESTER_CHECK(u32x3_dot(a, b) == 32u);
+	TESTER_CHECK(u32x3_clamp(U32x3{10u, 2u, 7u}, U32x3{0u, 0u, 0u}, U32x3{5u, 5u, 5u}) == U32x3{5u, 2u, 5u});
+}
+
+TESTER_TEST("[MATH][U32x4]: SIMD ops + alignment")
+{
+	TESTER_CHECK(sizeof(U32x4)  == 16);
+	TESTER_CHECK(alignof(U32x4) == 16);
+
+	U32x4 a = {1u, 2u, 3u, 4u};
+	U32x4 b = {5u, 6u, 7u, 8u};
+	TESTER_CHECK(a + b == U32x4{6u, 8u, 10u, 12u});
+	TESTER_CHECK(b - a == U32x4{4u, 4u,  4u,  4u});
+	TESTER_CHECK(a * 3u == U32x4{3u, 6u, 9u, 12u});
+	TESTER_CHECK(u32x4_dot(a, b) == 70u);
+	TESTER_CHECK(u32x4_min(a, b) == a);
+	TESTER_CHECK(u32x4_max(a, b) == b);
+	TESTER_CHECK(u32x4_clamp(U32x4{20u, 1u, 8u, 3u}, U32x4{0u, 0u, 0u, 0u}, U32x4{5u, 5u, 5u, 5u}) == U32x4{5u, 1u, 5u, 3u});
+	TESTER_CHECK(u32x4_from_u32(5u) == U32x4{5u, 5u, 5u, 5u});
+}
+
+// ============================================================================
+// Matrices — F32x2x2 / F32x3x3 / F32x4x4 / F64 mirror
+// ============================================================================
+
+TESTER_TEST("[MATH][F32x2x2]: identity + ops")
+{
+	F32x2x2 I = f32x2x2_identity();
+	TESTER_CHECK((I == F32x2x2{1.0f, 0.0f, 0.0f, 1.0f}));
+
+	F32x2x2 A = {1.0f, 2.0f, 3.0f, 4.0f};
+	F32x2x2 B = {5.0f, 6.0f, 7.0f, 8.0f};
+	TESTER_CHECK(A + B == F32x2x2{6.0f, 8.0f, 10.0f, 12.0f});
+	TESTER_CHECK(A * 2.0f == F32x2x2{2.0f, 4.0f, 6.0f, 8.0f});
+	TESTER_CHECK(f32x2x2_transpose(A) == F32x2x2{1.0f, 3.0f, 2.0f, 4.0f});
+	TESTER_CHECK(f32x2x2_determinant(A) == -2.0f);
+
+	F32x2x2 Ainv = f32x2x2_inverse(A);
+	F32x2x2 P    = A * Ainv;
+	TESTER_CHECK(f32_approx_equal(P.m00, 1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(P.m01, 0.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(P.m10, 0.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(P.m11, 1.0f, 1e-5f));
+}
+
+TESTER_TEST("[MATH][F32x3x3]: layout + identity + mul + inverse")
+{
+	// Padded row layout: 48 bytes, matches std140/MSL matrix_float3x3.
+	TESTER_CHECK(sizeof(F32x3x3)  == 48);
+	TESTER_CHECK(alignof(F32x3x3) == 16);
+
+	F32x3x3 I = f32x3x3_identity();
+	TESTER_CHECK(I.m00 == 1.0f && I.m11 == 1.0f && I.m22 == 1.0f);
+	TESTER_CHECK(I.m01 == 0.0f && I.m10 == 0.0f);
+
+	F32x3 v = {1.0f, 2.0f, 3.0f};
+	TESTER_CHECK(v * I == v);
+
+	F32x3x3 A = {
+		1.0f, 2.0f, 3.0f, 0.0f,
+		0.0f, 1.0f, 4.0f, 0.0f,
+		5.0f, 6.0f, 0.0f, 0.0f
+	};
+	F32x3x3 Ainv = f32x3x3_inverse(A);
+	F32x3x3 P    = A * Ainv;
+	TESTER_CHECK(f32_approx_equal(P.m00, 1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(P.m11, 1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(P.m22, 1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(P.m01, 0.0f, 1e-5f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: layout + identity")
+{
+	TESTER_CHECK(sizeof(F32x4x4)  == 64);
+	TESTER_CHECK(alignof(F32x4x4) == 16);
+
+	F32x4x4 I = f32x4x4_identity();
+	F32x4 v = {1.0f, 2.0f, 3.0f, 4.0f};
+	TESTER_CHECK(v * I == v);
+	TESTER_CHECK(I * I == I);
+}
+
+TESTER_TEST("[MATH][F32x4x4]: mat-mat mul known values")
+{
+	F32x4x4 A = {
+		1.0f, 2.0f, 3.0f, 4.0f,
+		5.0f, 6.0f, 7.0f, 8.0f,
+		9.0f, 10.0f, 11.0f, 12.0f,
+		13.0f, 14.0f, 15.0f, 16.0f
+	};
+	// A * identity = A.
+	TESTER_CHECK(A * f32x4x4_identity() == A);
+	// identity * A = A.
+	TESTER_CHECK(f32x4x4_identity() * A == A);
+}
+
+TESTER_TEST("[MATH][F32x4x4]: translation / scaling / rotation")
+{
+	F32x4 origin = {0.0f, 0.0f, 0.0f, 1.0f};
+
+	F32x4x4 T = f32x4x4_translation(5.0f, 6.0f, 7.0f);
+	F32x4 t_applied = origin * T;
+	TESTER_CHECK(f32_approx_equal(t_applied.x, 5.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(t_applied.y, 6.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(t_applied.z, 7.0f, 1e-5f));
+
+	F32x4x4 S = f32x4x4_scaling(2.0f, 3.0f, 4.0f);
+	F32x4 unit = {1.0f, 1.0f, 1.0f, 1.0f};
+	F32x4 s_applied = unit * S;
+	TESTER_CHECK(f32_approx_equal(s_applied.x, 2.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(s_applied.y, 3.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(s_applied.z, 4.0f, 1e-5f));
+
+	// Rotate +X by 90 deg about Y axis → -Z (right-handed).
+	F32x4 x_axis = {1.0f, 0.0f, 0.0f, 0.0f};
+	F32x4 rotated = x_axis * f32x4x4_rotation_y(F32_PI_OVER_2);
+	TESTER_CHECK(f32_approx_equal(rotated.x, 0.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(rotated.z, -1.0f, 1e-5f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: inverse round-trip")
+{
+	F32x4x4 A = {
+		2.0f, 5.0f, 0.0f, 8.0f,
+		1.0f, 4.0f, 2.0f, 6.0f,
+		7.0f, 8.0f, 8.0f, 3.0f,
+		1.0f, 5.0f, 7.0f, 8.0f
+	};
+	TESTER_CHECK(f32x4x4_is_invertible(A));
+	F32x4x4 Ainv = f32x4x4_inverse(A);
+	F32x4x4 P    = A * Ainv;
+	// P should be identity within 1e-4 tolerance.
+	TESTER_CHECK(f32x4x4_approx_equal(P, f32x4x4_identity(), 1e-4f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: look_at + canonical convention")
+{
+	// Camera at (0, 0, 5) looking at origin, Y up. Target (origin) should map to -Z in view space.
+	F32x4x4 view = f32x4x4_look_at(F32x3{0.0f, 0.0f, 5.0f}, F32X3_ZERO, F32X3_UP);
+	F32x4   origin = {0.0f, 0.0f, 0.0f, 1.0f};
+	F32x4   view_pos = origin * view;
+	// Origin is 5 units along -Z in view space.
+	TESTER_CHECK(f32_approx_equal(view_pos.z, -5.0f, 1e-4f));
+	TESTER_CHECK(f32_approx_equal(view_pos.x,  0.0f, 1e-4f));
+	TESTER_CHECK(f32_approx_equal(view_pos.y,  0.0f, 1e-4f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: perspective canonical NDC")
+{
+	F32x4x4 P = f32x4x4_perspective(F32_PI_OVER_2, 1.0f, 0.1f, 100.0f);
+
+	// Point at -znear maps to clip.z / clip.w = 0 (near plane -> NDC z = 0).
+	F32x4 near_pt = F32x4{0.0f, 0.0f, -0.1f, 1.0f} * P;
+	TESTER_CHECK(f32_approx_equal(near_pt.z / near_pt.w, 0.0f, 1e-4f));
+
+	// Point at -zfar maps to NDC z = 1.
+	F32x4 far_pt = F32x4{0.0f, 0.0f, -100.0f, 1.0f} * P;
+	TESTER_CHECK(f32_approx_equal(far_pt.z / far_pt.w, 1.0f, 1e-4f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: orthographic canonical NDC")
+{
+	F32x4x4 P = f32x4x4_orthographic(-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f);
+
+	// Point (-1, -1, 0) (view space near-plane lower-left) -> NDC (-1, -1, 0).
+	F32x4 near_ll = F32x4{-1.0f, -1.0f, 0.0f, 1.0f} * P;
+	TESTER_CHECK(f32_approx_equal(near_ll.x, -1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(near_ll.y, -1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(near_ll.z,  0.0f, 1e-5f));
+
+	// Point (1, 1, -1) (view space far-plane upper-right) -> NDC (1, 1, 1).
+	F32x4 far_ur = F32x4{1.0f, 1.0f, -1.0f, 1.0f} * P;
+	TESTER_CHECK(f32_approx_equal(far_ur.x, 1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(far_ur.y, 1.0f, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(far_ur.z, 1.0f, 1e-5f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: project / unproject round-trip")
+{
+	F32x4x4 view    = f32x4x4_look_at(F32x3{0.0f, 0.0f, 5.0f}, F32X3_ZERO, F32X3_UP);
+	F32x4x4 proj    = f32x4x4_perspective(F32_PI_OVER_2, 16.0f / 9.0f, 0.1f, 100.0f);
+	F32x4x4 vp      = view * proj;
+	F32x4x4 vp_inv  = f32x4x4_inverse(vp);
+	F32x4   viewport = {0.0f, 0.0f, 1920.0f, 1080.0f};
+
+	F32x3 world  = {1.0f, 0.5f, 0.0f};
+	F32x3 screen = f32x3_project(world, vp, viewport);
+	F32x3 back   = f32x3_unproject(screen, vp_inv, viewport);
+
+	TESTER_CHECK(f32x3_approx_equal(world, back, 1e-3f));
+}
+
+TESTER_TEST("[MATH][F64x4x4]: layout + identity + mul")
+{
+	TESTER_CHECK(sizeof(F64x4x4)  == 128);
+	TESTER_CHECK(alignof(F64x4x4) == 32);
+
+	F64x4x4 I = f64x4x4_identity();
+	F64x4 v = {1.0, 2.0, 3.0, 4.0};
+	TESTER_CHECK(v * I == v);
+	TESTER_CHECK(I * I == I);
+
+	F64x4x4 A = {
+		1.0, 2.0, 3.0, 4.0,
+		5.0, 6.0, 7.0, 8.0,
+		9.0, 10.0, 11.0, 12.0,
+		13.0, 14.0, 15.0, 16.0
+	};
+	TESTER_CHECK(A * f64x4x4_identity() == A);
+}
+
+TESTER_TEST("[MATH][F64x3x3]: layout + identity + inverse")
+{
+	TESTER_CHECK(sizeof(F64x3x3)  == 96);
+	TESTER_CHECK(alignof(F64x3x3) == 32);
+
+	F64x3x3 A = {
+		1.0, 2.0, 3.0, 0.0,
+		0.0, 1.0, 4.0, 0.0,
+		5.0, 6.0, 0.0, 0.0
+	};
+	F64x3x3 Ainv = f64x3x3_inverse(A);
+	F64x3x3 P    = A * Ainv;
+	TESTER_CHECK(f64_approx_equal(P.m00, 1.0, 1e-10));
+	TESTER_CHECK(f64_approx_equal(P.m11, 1.0, 1e-10));
+	TESTER_CHECK(f64_approx_equal(P.m22, 1.0, 1e-10));
+}
+
+TESTER_TEST("[MATH][F64x2x2]: ops")
+{
+	F64x2x2 I = f64x2x2_identity();
+	F64x2 v = {3.0, 4.0};
+	TESTER_CHECK(v * I == v);
+	TESTER_CHECK(f64x2x2_determinant(F64x2x2{1.0, 2.0, 3.0, 4.0}) == -2.0);
+}
+
+// ============================================================================
+// Quaternion
+// ============================================================================
+
+TESTER_TEST("[MATH][Quaternion]: identity + operators")
+{
+	Quaternion I = quaternion_identity();
+	TESTER_CHECK(I == QUATERNION_IDENTITY);
+	TESTER_CHECK((I == Quaternion{1.0f, 0.0f, 0.0f, 0.0f}));
+
+	Quaternion p = {1.0f, 2.0f, 3.0f, 4.0f};
+	Quaternion q = {5.0f, 6.0f, 7.0f, 8.0f};
+	TESTER_CHECK((p + q == Quaternion{6.0f, 8.0f, 10.0f, 12.0f}));
+	TESTER_CHECK((-p   == Quaternion{-1.0f, -2.0f, -3.0f, -4.0f}));
+	TESTER_CHECK((p * 2.0f == Quaternion{2.0f, 4.0f, 6.0f, 8.0f}));
+	TESTER_CHECK(f32_approx_equal(quaternion_dot(p, q), 70.0f, 1e-5f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: from_axis_angle + length")
+{
+	// 0-angle about any axis is identity.
+	TESTER_CHECK(quaternion_approx_equal(quaternion_from_axis_angle(F32X3_UP, 0.0f), QUATERNION_IDENTITY, 1e-6f));
+
+	// 180° about X: (0, 1, 0, 0).
+	Quaternion r = quaternion_from_axis_angle(F32X3_RIGHT, F32_PI);
+	TESTER_CHECK(f32_approx_equal(r.w, 0.0f, 1e-6f));
+	TESTER_CHECK(f32_approx_equal(r.x, 1.0f, 1e-6f));
+
+	// All unit-axis rotations produce unit quaternions.
+	for (F32 theta : {0.5f, 1.0f, 1.5f, 2.0f, 3.0f})
+	{
+		Quaternion q = quaternion_from_axis_angle(F32X3_UP, theta);
+		TESTER_CHECK(f32_approx_equal(quaternion_length(q), 1.0f, 1e-6f));
+	}
+}
+
+TESTER_TEST("[MATH][Quaternion]: inverse round-trip")
+{
+	Quaternion q = quaternion_normalize(Quaternion{1.0f, 2.0f, 3.0f, 4.0f});
+	Quaternion inv = quaternion_inverse(q);
+	Quaternion p = q * inv;
+	TESTER_CHECK(quaternion_approx_equal(p, QUATERNION_IDENTITY, 1e-5f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: vector rotation by quaternion")
+{
+	// 90° about +Y: rotates +X to -Z (right-handed).
+	Quaternion q = quaternion_from_axis_angle(F32X3_UP, F32_PI_OVER_2);
+	F32x3 rotated = F32X3_RIGHT * q;
+	TESTER_CHECK(f32x3_approx_equal(rotated, F32X3_FORWARD, 1e-5f));
+
+	// Rotating +Y about +Y leaves it unchanged.
+	F32x3 same = F32X3_UP * q;
+	TESTER_CHECK(f32x3_approx_equal(same, F32X3_UP, 1e-5f));
+
+	// Identity rotation leaves vectors unchanged.
+	TESTER_CHECK(f32x3_approx_equal(F32X3_RIGHT * QUATERNION_IDENTITY, F32X3_RIGHT, 1e-6f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: from_euler_angles is right-handed")
+{
+	// Euler (pitch=0, yaw=pi/2, roll=0): rotates +X to -Z (right-handed about +Y).
+	// This is the test that used to fail under the old left-handed implementation.
+	Quaternion q = quaternion_from_euler_angles(F32x3{0.0f, F32_PI_OVER_2, 0.0f});
+	F32x3 rotated = F32X3_RIGHT * q;
+	TESTER_CHECK(f32x3_approx_equal(rotated, F32X3_FORWARD, 1e-5f));
+
+	// Round-trip through to_euler_angles.
+	F32x3 angles = F32x3{0.3f, 0.5f, -0.7f};
+	Quaternion q2 = quaternion_from_euler_angles(angles);
+	F32x3 recovered = quaternion_to_euler_angles(q2);
+	TESTER_CHECK(f32x3_approx_equal(recovered, angles, 1e-4f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: slerp endpoints + midpoint")
+{
+	Quaternion a = quaternion_from_axis_angle(F32X3_UP, 0.0f);
+	Quaternion b = quaternion_from_axis_angle(F32X3_UP, F32_PI_OVER_2);
+
+	TESTER_CHECK(quaternion_approx_equal(quaternion_slerp(a, b, 0.0f), a, 1e-5f));
+	TESTER_CHECK(quaternion_approx_equal(quaternion_slerp(a, b, 1.0f), b, 1e-5f));
+
+	// Midpoint is 45° about +Y.
+	Quaternion mid = quaternion_slerp(a, b, 0.5f);
+	Quaternion expected = quaternion_from_axis_angle(F32X3_UP, F32_PI_OVER_2 * 0.5f);
+	TESTER_CHECK(quaternion_approx_equal(mid, expected, 1e-5f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: from_to_rotation")
+{
+	// +X to +Y: 90° about +Z.
+	Quaternion q = quaternion_from_to_rotation(F32X3_RIGHT, F32X3_UP);
+	F32x3 rotated = F32X3_RIGHT * q;
+	TESTER_CHECK(f32x3_approx_equal(rotated, F32X3_UP, 1e-5f));
+
+	// Identity case: +X to +X.
+	Quaternion ident = quaternion_from_to_rotation(F32X3_RIGHT, F32X3_RIGHT);
+	TESTER_CHECK(quaternion_approx_equal(ident, QUATERNION_IDENTITY, 1e-4f));
+
+	// Anti-parallel: +X to -X. Rotates 180° about some perpendicular axis.
+	Quaternion flip = quaternion_from_to_rotation(F32X3_RIGHT, F32X3_LEFT);
+	F32x3 flipped = F32X3_RIGHT * flip;
+	TESTER_CHECK(f32x3_approx_equal(flipped, F32X3_LEFT, 1e-4f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: look_rotation")
+{
+	// Looking down -Z with +Y up → identity orientation.
+	Quaternion q = quaternion_look_rotation(F32X3_FORWARD, F32X3_UP);
+	TESTER_CHECK(quaternion_approx_equal(q, QUATERNION_IDENTITY, 1e-4f));
+
+	// Looking down +X with +Y up → 90° about +Y.
+	Quaternion q2 = quaternion_look_rotation(F32X3_RIGHT, F32X3_UP);
+	F32x3 forward_of_q2 = F32X3_FORWARD * q2;
+	TESTER_CHECK(f32x3_approx_equal(forward_of_q2, F32X3_RIGHT, 1e-4f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: rotate_towards clamps step")
+{
+	Quaternion a = quaternion_identity();
+	// 120° about UP — unambiguous shortest arc (180° has ambiguous direction).
+	Quaternion b = quaternion_from_axis_angle(F32X3_UP, F32_PI * 2.0f / 3.0f);
+	F32 full_angle = F32_PI * 2.0f / 3.0f;
+
+	// Small max step — doesn't reach target.
+	Quaternion stepped = quaternion_rotate_towards(a, b, 0.1f);
+	TESTER_CHECK(!quaternion_approx_equal(stepped, b, 1e-3f));
+
+	// Large max step (> angular distance) — snaps to target.
+	Quaternion snapped = quaternion_rotate_towards(a, b, F32_PI * 2.0f);
+	TESTER_CHECK(quaternion_approx_equal(snapped, b, 1e-5f));
+
+	// Mid-range step — 60° about UP. Compare via a probe vector (robust
+	// against q / -q double cover that field-wise comparison would trip on).
+	Quaternion half = quaternion_rotate_towards(a, b, full_angle * 0.5f);
+	F32x3 via_half     = F32X3_RIGHT * half;
+	F32x3 via_expected = F32X3_RIGHT * quaternion_from_axis_angle(F32X3_UP, full_angle * 0.5f);
+	TESTER_CHECK(f32x3_approx_equal(via_half, via_expected, 1e-4f));
+}
+
+TESTER_TEST("[MATH][Quaternion]: f32x4x4_from_quaternion agrees with vector rotation")
+{
+	Quaternion q = quaternion_from_axis_angle(F32X3_UP, F32_PI_OVER_2);
+	F32x4x4 M   = f32x4x4_from_quaternion(q);
+
+	// Rotating +X via matrix and via quaternion should match.
+	F32x3 via_quat   = F32X3_RIGHT * q;
+	F32x4 via_matrix = F32x4{1.0f, 0.0f, 0.0f, 0.0f} * M;
+
+	TESTER_CHECK(f32_approx_equal(via_quat.x, via_matrix.x, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(via_quat.y, via_matrix.y, 1e-5f));
+	TESTER_CHECK(f32_approx_equal(via_quat.z, via_matrix.z, 1e-5f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: decompose TRS round-trip")
+{
+	F32x3 T = {5.0f, 6.0f, 7.0f};
+	Quaternion R = quaternion_from_axis_angle(f32x3_normalize(F32x3{1.0f, 1.0f, 1.0f}), 1.2f);
+	F32x3 S = {2.0f, 3.0f, 4.0f};
+
+	F32x4x4 M = f32x4x4_scaling(S) * f32x4x4_from_quaternion(R) * f32x4x4_translation(T);
+
+	F32x3 out_t;
+	Quaternion out_r;
+	F32x3 out_s;
+	TESTER_CHECK(f32x4x4_decompose(M, &out_t, &out_r, &out_s));
+
+	TESTER_CHECK(f32x3_approx_equal(out_t, T, 1e-4f));
+	TESTER_CHECK(f32x3_approx_equal(out_s, S, 1e-4f));
+	// Quaternions ±q represent the same rotation — check by applying to a probe vector.
+	F32x3 probe  = f32x3_normalize(F32x3{0.3f, -0.4f, 0.8f});
+	TESTER_CHECK(f32x3_approx_equal(probe * R, probe * out_r, 1e-4f));
+}
+
+TESTER_TEST("[MATH][F32x4x4]: inverse round-trip M * inv(M) == I")
+{
+	// TRS-composed matrix (well-conditioned, invertible).
+	F32x3 T = {1.5f, -2.3f, 4.7f};
+	Quaternion R = quaternion_from_axis_angle(f32x3_normalize(F32x3{0.3f, 0.8f, -0.2f}), 0.9f);
+	F32x3 S = {2.0f, 0.5f, 3.0f};
+	F32x4x4 M    = f32x4x4_scaling(S) * f32x4x4_from_quaternion(R) * f32x4x4_translation(T);
+	F32x4x4 Minv = f32x4x4_inverse(M);
+
+	TESTER_CHECK(f32x4x4_is_invertible(M));
+
+	F32x4x4 I      = f32x4x4_identity();
+	F32x4x4 prod_L = M * Minv;
+	F32x4x4 prod_R = Minv * M;
+	for (I32 i = 0; i < 16; i++)
+	{
+		TESTER_CHECK(f32_approx_equal(f32x4x4_at(prod_L, i), f32x4x4_at(I, i), 1e-4f));
+		TESTER_CHECK(f32_approx_equal(f32x4x4_at(prod_R, i), f32x4x4_at(I, i), 1e-4f));
+	}
+
+	// Singular matrix — determinant 0, not invertible.
+	F32x4x4 singular = F32x4x4{};  // all zeros
+	TESTER_CHECK(!f32x4x4_is_invertible(singular));
+}
+
+// ============================================================================
+// Random (xoshiro256**)
+// ============================================================================
+
+TESTER_TEST("[MATH][Random]: seed determinism")
+{
+	Random a, b;
+	random_seed(a, 42);
+	random_seed(b, 42);
+	for (int i = 0; i < 1000; ++i)
+		TESTER_CHECK(random_u64(a) == random_u64(b));
+
+	// Different seed → different stream (first 10 draws).
+	Random c, d;
+	random_seed(c, 1);
+	random_seed(d, 2);
+	bool any_diff = false;
+	for (int i = 0; i < 10; ++i)
+		if (random_u64(c) != random_u64(d)) { any_diff = true; break; }
+	TESTER_CHECK(any_diff);
+}
+
+TESTER_TEST("[MATH][Random]: f32_random_unit in [0, 1)")
+{
+	Random rng;
+	random_seed(rng, 12345);
+	for (int i = 0; i < 10000; ++i)
+	{
+		F32 x = f32_random_unit(rng);
+		TESTER_CHECK(x >= 0.0f);
+		TESTER_CHECK(x <  1.0f);
+	}
+}
+
+TESTER_TEST("[MATH][Random]: f32_random_range bounds")
+{
+	Random rng;
+	random_seed(rng, 0xABCDEF);
+	for (int i = 0; i < 10000; ++i)
+	{
+		F32 x = f32_random_range(rng, -5.0f, 5.0f);
+		TESTER_CHECK(x >= -5.0f);
+		TESTER_CHECK(x <   5.0f);
+	}
+}
+
+TESTER_TEST("[MATH][Random]: i32_random_range inclusive bounds")
+{
+	Random rng;
+	random_seed(rng, 777);
+	I32 observed_min =  1000;
+	I32 observed_max = -1000;
+	for (int i = 0; i < 10000; ++i)
+	{
+		I32 v = i32_random_range(rng, -3, 5);
+		TESTER_CHECK(v >= -3);
+		TESTER_CHECK(v <=  5);
+		if (v < observed_min) observed_min = v;
+		if (v > observed_max) observed_max = v;
+	}
+	// With 10000 draws across 9 bins we should hit both endpoints.
+	TESTER_CHECK(observed_min == -3);
+	TESTER_CHECK(observed_max ==  5);
+}
+
+TESTER_TEST("[MATH][Random]: f32x3_random_in_unit_sphere / on_unit_sphere")
+{
+	Random rng;
+	random_seed(rng, 99);
+
+	for (int i = 0; i < 1000; ++i)
+	{
+		F32x3 in_sphere = f32x3_random_in_unit_sphere(rng);
+		TESTER_CHECK(f32x3_length_squared(in_sphere) <= 1.0f + 1e-5f);
+
+		F32x3 on_sphere = f32x3_random_on_unit_sphere(rng);
+		TESTER_CHECK(f32_approx_equal(f32x3_length(on_sphere), 1.0f, 1e-5f));
+	}
+}
+
+TESTER_TEST("[MATH][Random]: f32x2_random_in_unit_disk")
+{
+	Random rng;
+	random_seed(rng, 0xBEEF);
+	for (int i = 0; i < 1000; ++i)
+	{
+		F32x2 p = f32x2_random_in_unit_disk(rng);
+		TESTER_CHECK(f32x2_length_squared(p) <= 1.0f + 1e-5f);
+	}
+}
+
+TESTER_TEST("[MATH][Random]: quaternion_random produces unit quaternions")
+{
+	Random rng;
+	random_seed(rng, 2026);
+	for (int i = 0; i < 1000; ++i)
+	{
+		Quaternion q = quaternion_random(rng);
+		TESTER_CHECK(f32_approx_equal(quaternion_length(q), 1.0f, 1e-5f));
+	}
+}
+
+// ============================================================================
+// Formatter smoke tests — make sure every math type round-trips through
+// Core's formatter without compile errors.
+// ============================================================================
+
+TESTER_TEST("[MATH][format]: vectors + matrix + quaternion")
+{
+	Formatter f = formatter_init();
+	DEFER(formatter_deinit(f));
+
+	// F32x3 non-empty string produced.
+	formatter_clear(f);
+	format(f, F32x3{1.0f, 2.0f, 3.0f});
+	TESTER_CHECK(f.buffer.count > 0);
+
+	// F32x4x4 non-empty string produced.
+	formatter_clear(f);
+	format(f, f32x4x4_identity());
+	TESTER_CHECK(f.buffer.count > 0);
+
+	// Quaternion formatter fires.
+	formatter_clear(f);
+	format(f, QUATERNION_IDENTITY);
+	TESTER_CHECK(f.buffer.count > 0);
+
+	// Integer vector formatter fires.
+	formatter_clear(f);
+	format(f, U32x2{10u, 20u});
+	TESTER_CHECK(f.buffer.count > 0);
+}
diff --git a/unittest/src/unittest_platform.cpp b/unittest/src/unittest_platform.cpp
index 8046f90f..8eabac4b 100644
--- a/unittest/src/unittest_platform.cpp
+++ b/unittest/src/unittest_platform.cpp
@@ -18,33 +18,33 @@ TESTER_TEST("[PLATFORM] memory")
 
 TESTER_TEST("[PLATFORM] file")
 {
-	u32 write_data[1024] = {};
-	for (u32 i = 0; i < 1024; ++i)
+	U32 write_data[1024] = {};
+	for (U32 i = 0; i < 1024; ++i)
 		write_data[i] = i;
 
 	Platform_Memory write_mem = {};
-	write_mem.ptr  = (u8 *)write_data;
+	write_mem.ptr  = (U8 *)write_data;
 	write_mem.size = sizeof(write_data);
 
 	const char *filepath = "test.platform";
 
-	u64 written_size = platform_file_write(filepath, write_mem);
+	U64 written_size = platform_file_write(filepath, write_mem);
 	TESTER_CHECK(written_size == write_mem.size);
 
-	u64 file_size = platform_file_size(filepath);
+	U64 file_size = platform_file_size(filepath);
 	TESTER_CHECK(file_size == write_mem.size);
 
-	u32 read_data[1024] = {};
+	U32 read_data[1024] = {};
 	Platform_Memory read_mem = {};
-	read_mem.ptr  = (u8 *)read_data;
+	read_mem.ptr  = (U8 *)read_data;
 	read_mem.size = sizeof(read_data);
 
-	u64 read_size = platform_file_read(filepath, read_mem);
+	U64 read_size = platform_file_read(filepath, read_mem);
 	TESTER_CHECK(read_size == written_size);
 	TESTER_CHECK(read_size == read_mem.size);
 
 	bool same = true;
-	for (u32 i = 0; i < 1024; ++i)
+	for (U32 i = 0; i < 1024; ++i)
 	{
 		if (read_data[i] != write_data[i])
 		{
@@ -62,7 +62,7 @@ TESTER_TEST("[PLATFORM] file")
 	TESTER_CHECK(read_size == read_mem.size);
 
 	same = true;
-	for (u32 i = 0; i < 1024; ++i)
+	for (U32 i = 0; i < 1024; ++i)
 	{
 		if (read_data[i] != write_data[i])
 		{
@@ -80,11 +80,11 @@ TESTER_TEST("[PLATFORM] file")
 TESTER_TEST("[PLATFORM] time")
 {
 	// platform_sleep_set_period(1);
-	// u64 begin_time = platform_query_microseconds();
+	// U64 begin_time = platform_query_microseconds();
 	// platform_sleep(16);
-	// u64 end_time = platform_query_microseconds();
+	// U64 end_time = platform_query_microseconds();
 
-	// f32 delta_time = (end_time - begin_time) * MICROSECOND_TO_MILLISECOND;
+	// F32 delta_time = (end_time - begin_time) * MICROSECOND_TO_MILLISECOND;
 
 	// TESTER_CHECK(delta_time == doctest::Approx(16).epsilon(0.25));
 }
\ No newline at end of file
diff --git a/unittest/src/unittest_reflect.cpp b/unittest/src/unittest_reflect.cpp
index 807ac1f3..487b8bd1 100644
--- a/unittest/src/unittest_reflect.cpp
+++ b/unittest/src/unittest_reflect.cpp
@@ -6,7 +6,7 @@
 #include <core/containers/string.h>
 #include <core/containers/hash_table.h>
 
-enum REFLECT
+enum REFLECT : int
 {
 	REFLECT_ENUM_0,
 	REFLECT_ENUM_1,
@@ -17,14 +17,14 @@ enum REFLECT
 	REFLECT_ENUM_6,
 };
 
-enum class ENUM_CLASS
+enum class ENUM_CLASS : int
 {
 	NEG_ONE = -1,
 	ZERO,
 	ONE
 };
 
-enum ENUM_WITH_FLAGS
+enum ENUM_WITH_FLAGS : int
 {
 	ONE          = 1 << 0,
 	TWO          = 1 << 1,
@@ -34,7 +34,7 @@ enum ENUM_WITH_FLAGS
 
 TYPE_OF_ENUM(ENUM_WITH_FLAGS, ONE, TWO, FOUR, TWO_POWER_16)
 
-enum UNORDERED_ENUM
+enum UNORDERED_ENUM : int
 {
 	UNORDERED_ENUM_ONE = 1,
 	UNORDERED_ENUM_MINUS_ONE = -1,
@@ -43,14 +43,14 @@ enum UNORDERED_ENUM
 
 TYPE_OF_ENUM(UNORDERED_ENUM, UNORDERED_ENUM_ONE, UNORDERED_ENUM_MINUS_ONE, UNORDERED_ENUM_ZERO)
 
-enum EMPTY_ENUM
+enum EMPTY_ENUM : int
 {
 
 };
 
 TYPE_OF_ENUM(EMPTY_ENUM)
 
-enum ENUM_WITH_SAME_VALUES
+enum ENUM_WITH_SAME_VALUES : int
 {
 	ZERO,
 	ONE_MINUS_ONE = 0
@@ -67,7 +67,7 @@ TYPE_OF(Empty)
 
 struct Vector3
 {
-	f32 x, y, z;
+	F32 x, y, z;
 };
 
 TYPE_OF(Vector3, x, y, z)
@@ -128,44 +128,44 @@ TESTER_TEST("[CORE]: Reflect")
 {
 	// ("name_of<T> primitives")
 	{
-		auto i08_name = name_of<i8>();
-		auto i16_name = name_of<i16>();
-		auto i32_name = name_of<i32>();
-		auto i64_name = name_of<i64>();
+		auto i08_name = name_of<I8>();
+		auto i16_name = name_of<I16>();
+		auto i32_name = name_of<I32>();
+		auto i64_name = name_of<I64>();
 
-		auto u08_name = name_of<u8>();
-		auto u16_name = name_of<u16>();
-		auto u32_name = name_of<u32>();
-		auto u64_name = name_of<u64>();
+		auto u08_name = name_of<U8>();
+		auto u16_name = name_of<U16>();
+		auto u32_name = name_of<U32>();
+		auto u64_name = name_of<U64>();
 
-		auto f32_name = name_of<f32>();
-		auto f64_name = name_of<f64>();
+		auto f32_name = name_of<F32>();
+		auto f64_name = name_of<F64>();
 
 		auto bool_name = name_of<bool>();
 		auto char_name = name_of<char>();
 
 		auto void_name = name_of<void>();
 
-		auto const_i08_name = name_of<const i8>();
-		auto const_i16_name = name_of<const i16>();
-		auto const_i32_name = name_of<const i32>();
-		auto const_i64_name = name_of<const i64>();
+		auto const_i08_name = name_of<const I8>();
+		auto const_i16_name = name_of<const I16>();
+		auto const_i32_name = name_of<const I32>();
+		auto const_i64_name = name_of<const I64>();
 
-		auto const_u08_name = name_of<const u8>();
-		auto const_u16_name = name_of<const u16>();
-		auto const_u32_name = name_of<const u32>();
-		auto const_u64_name = name_of<const u64>();
+		auto const_u08_name = name_of<const U8>();
+		auto const_u16_name = name_of<const U16>();
+		auto const_u32_name = name_of<const U32>();
+		auto const_u64_name = name_of<const U64>();
 
-		auto const_f32_name = name_of<const f32>();
-		auto const_f64_name = name_of<const f64>();
+		auto const_f32_name = name_of<const F32>();
+		auto const_f64_name = name_of<const F64>();
 
 		auto const_bool_name = name_of<const bool>();
 		auto const_char_name = name_of<const char>();
 
 		auto const_void_name = name_of<const void>();
 
-		auto i08_ref_name = name_of<i8 &>();
-		auto const_i08_ref_name = name_of<const i8 &>();
+		auto i08_ref_name = name_of<I8 &>();
+		auto const_i08_ref_name = name_of<const I8 &>();
 
 		TESTER_CHECK(string_literal(i08_name) == "i8");
 		TESTER_CHECK(string_literal(i16_name) == "i16");
@@ -227,10 +227,10 @@ TESTER_TEST("[CORE]: Reflect")
 		auto const_vec3_ptr_const_ref_name = name_of<const Vector3 * const &>();
 		TESTER_CHECK(string_literal(const_vec3_ptr_const_ref_name) == "const Vector3* const&");
 
-		auto const_point_ref_const_i32_ptr_const_ref_name = name_of<const Point<const i32 * const &> &>();
+		auto const_point_ref_const_i32_ptr_const_ref_name = name_of<const Point<const I32 * const &> &>();
 		TESTER_CHECK(string_literal(const_point_ref_const_i32_ptr_const_ref_name) == "const Point<const i32* const&>&");
 
-		auto const_point_const_i32_ptr_name = name_of<const Point<const i32 *>>();
+		auto const_point_const_i32_ptr_name = name_of<const Point<const I32 *>>();
 		TESTER_CHECK(string_literal(const_point_const_i32_ptr_name) == "const Point<const i32*>");
 	}
 
@@ -245,18 +245,18 @@ TESTER_TEST("[CORE]: Reflect")
 
 	// ("name_of<T> template struct")
 	{
-		auto point_i08_name = name_of<Point<i8>>();
-		auto point_i16_name = name_of<Point<i16>>();
-		auto point_i32_name = name_of<Point<i32>>();
-		auto point_i64_name = name_of<Point<i64>>();
+		auto point_i08_name = name_of<Point<I8>>();
+		auto point_i16_name = name_of<Point<I16>>();
+		auto point_i32_name = name_of<Point<I32>>();
+		auto point_i64_name = name_of<Point<I64>>();
 
-		auto point_u08_name = name_of<Point<u8>>();
-		auto point_u16_name = name_of<Point<u16>>();
-		auto point_u32_name = name_of<Point<u32>>();
-		auto point_u64_name = name_of<Point<u64>>();
+		auto point_u08_name = name_of<Point<U8>>();
+		auto point_u16_name = name_of<Point<U16>>();
+		auto point_u32_name = name_of<Point<U32>>();
+		auto point_u64_name = name_of<Point<U64>>();
 
-		auto point_f32_name = name_of<Point<f32>>();
-		auto point_f64_name = name_of<Point<f64>>();
+		auto point_f32_name = name_of<Point<F32>>();
+		auto point_f64_name = name_of<Point<F64>>();
 
 		auto point_bool_name = name_of<Point<bool>>();
 		auto point_char_name = name_of<Point<char>>();
@@ -266,15 +266,15 @@ TESTER_TEST("[CORE]: Reflect")
 		auto point_vec3_name = name_of<Point<Vector3>>();
 		auto point_const_vec3_name = name_of<Point<const Vector3>>();
 
-		auto foo_i32_f32_name = name_of<Foo_Struct<i32, f32>>();
-		auto bar_i32_f32_vec3_name = name_of<Bar<i32, f32, Vector3>>();
-		auto bar_const_i32_const_f32_const_vec3_name = name_of<Bar<const i32, const f32, const Vector3>>();
-		auto bar_const_point_const_i32_const_f32_const_vec3_name = name_of<Bar<const Point<const i32>, const f32, const Vector3>>();
+		auto foo_i32_f32_name = name_of<Foo_Struct<I32, F32>>();
+		auto bar_i32_f32_vec3_name = name_of<Bar<I32, F32, Vector3>>();
+		auto bar_const_i32_const_f32_const_vec3_name = name_of<Bar<const I32, const F32, const Vector3>>();
+		auto bar_const_point_const_i32_const_f32_const_vec3_name = name_of<Bar<const Point<const I32>, const F32, const Vector3>>();
 
-		auto point_nested = name_of<const Point<const Point<const Point<const Point<Point<i32>>>>>>();
-		auto point_nested_ptr = name_of<const Point<const Point<const Point<const Point<Point<i32 *> *> *> *> *> *>();
-		auto point_nested_ptr2 = name_of<const Point<const Point<const Point<const Point<Point<i32 const *> *> *> *> *> *>();
-		auto point_nested_const_ptr_const = name_of<const Point<const Point<i32> * const>>();
+		auto point_nested = name_of<const Point<const Point<const Point<const Point<Point<I32>>>>>>();
+		auto point_nested_ptr = name_of<const Point<const Point<const Point<const Point<Point<I32 *> *> *> *> *> *>();
+		auto point_nested_ptr2 = name_of<const Point<const Point<const Point<const Point<Point<I32 const *> *> *> *> *> *>();
+		auto point_nested_const_ptr_const = name_of<const Point<const Point<I32> * const>>();
 
 		TESTER_CHECK(string_literal(point_i08_name) == "Point<i8>");
 		TESTER_CHECK(string_literal(point_i16_name) == "Point<i16>");
@@ -309,37 +309,37 @@ TESTER_TEST("[CORE]: Reflect")
 
 	// ("kind_of<T>")
 	{
-		TESTER_CHECK(kind_of<i8>() == TYPE_KIND_I8);
-		TESTER_CHECK(kind_of<const i8>() == TYPE_KIND_I8);
-		TESTER_CHECK(kind_of<const i8 &>() == TYPE_KIND_I8);
+		TESTER_CHECK(kind_of<I8>() == TYPE_KIND_I8);
+		TESTER_CHECK(kind_of<const I8>() == TYPE_KIND_I8);
+		TESTER_CHECK(kind_of<const I8 &>() == TYPE_KIND_I8);
 
-		TESTER_CHECK(kind_of<i16>() == TYPE_KIND_I16);
-		TESTER_CHECK(kind_of<const i16>() == TYPE_KIND_I16);
-		TESTER_CHECK(kind_of<const i16 &>() == TYPE_KIND_I16);
+		TESTER_CHECK(kind_of<I16>() == TYPE_KIND_I16);
+		TESTER_CHECK(kind_of<const I16>() == TYPE_KIND_I16);
+		TESTER_CHECK(kind_of<const I16 &>() == TYPE_KIND_I16);
 
-		TESTER_CHECK(kind_of<i32>() == TYPE_KIND_I32);
-		TESTER_CHECK(kind_of<const i32>() == TYPE_KIND_I32);
-		TESTER_CHECK(kind_of<const i32 &>() == TYPE_KIND_I32);
+		TESTER_CHECK(kind_of<I32>() == TYPE_KIND_I32);
+		TESTER_CHECK(kind_of<const I32>() == TYPE_KIND_I32);
+		TESTER_CHECK(kind_of<const I32 &>() == TYPE_KIND_I32);
 
-		TESTER_CHECK(kind_of<i64>() == TYPE_KIND_I64);
-		TESTER_CHECK(kind_of<const i64>() == TYPE_KIND_I64);
-		TESTER_CHECK(kind_of<const i64 &>() == TYPE_KIND_I64);
+		TESTER_CHECK(kind_of<I64>() == TYPE_KIND_I64);
+		TESTER_CHECK(kind_of<const I64>() == TYPE_KIND_I64);
+		TESTER_CHECK(kind_of<const I64 &>() == TYPE_KIND_I64);
 
-		TESTER_CHECK(kind_of<u8>() == TYPE_KIND_U8);
-		TESTER_CHECK(kind_of<const u8>() == TYPE_KIND_U8);
-		TESTER_CHECK(kind_of<const u8 &>() == TYPE_KIND_U8);
+		TESTER_CHECK(kind_of<U8>() == TYPE_KIND_U8);
+		TESTER_CHECK(kind_of<const U8>() == TYPE_KIND_U8);
+		TESTER_CHECK(kind_of<const U8 &>() == TYPE_KIND_U8);
 
-		TESTER_CHECK(kind_of<u16>() == TYPE_KIND_U16);
-		TESTER_CHECK(kind_of<const u16>() == TYPE_KIND_U16);
-		TESTER_CHECK(kind_of<const u16 &>() == TYPE_KIND_U16);
+		TESTER_CHECK(kind_of<U16>() == TYPE_KIND_U16);
+		TESTER_CHECK(kind_of<const U16>() == TYPE_KIND_U16);
+		TESTER_CHECK(kind_of<const U16 &>() == TYPE_KIND_U16);
 
-		TESTER_CHECK(kind_of<u32>() == TYPE_KIND_U32);
-		TESTER_CHECK(kind_of<const u32>() == TYPE_KIND_U32);
-		TESTER_CHECK(kind_of<const u32 &>() == TYPE_KIND_U32);
+		TESTER_CHECK(kind_of<U32>() == TYPE_KIND_U32);
+		TESTER_CHECK(kind_of<const U32>() == TYPE_KIND_U32);
+		TESTER_CHECK(kind_of<const U32 &>() == TYPE_KIND_U32);
 
-		TESTER_CHECK(kind_of<u64>() == TYPE_KIND_U64);
-		TESTER_CHECK(kind_of<const u64>() == TYPE_KIND_U64);
-		TESTER_CHECK(kind_of<const u64 &>() == TYPE_KIND_U64);
+		TESTER_CHECK(kind_of<U64>() == TYPE_KIND_U64);
+		TESTER_CHECK(kind_of<const U64>() == TYPE_KIND_U64);
+		TESTER_CHECK(kind_of<const U64 &>() == TYPE_KIND_U64);
 
 		TESTER_CHECK(kind_of<bool>() == TYPE_KIND_BOOL);
 		TESTER_CHECK(kind_of<const bool>() == TYPE_KIND_BOOL);
@@ -369,29 +369,29 @@ TESTER_TEST("[CORE]: Reflect")
 
 	// ("type_of<T> primitives")
 	{
-		i32 i32_v = -1;
-		const Type *i32_type = type_of<i32>();
+		I32 i32_v = -1;
+		const Type *i32_type = type_of<I32>();
 		TESTER_CHECK(i32_type == type_of(i32_v));
 		TESTER_CHECK(string_literal(i32_type->name) == "i32");
 		TESTER_CHECK(i32_type->kind == TYPE_KIND_I32);
-		TESTER_CHECK(i32_type->size == sizeof(i32));
-		TESTER_CHECK(i32_type->align == alignof(i32));
+		TESTER_CHECK(i32_type->size == sizeof(I32));
+		TESTER_CHECK(i32_type->align == alignof(I32));
 
-		u32 u32_v = 1;
-		const Type *u32_type = type_of<u32>();
+		U32 u32_v = 1;
+		const Type *u32_type = type_of<U32>();
 		TESTER_CHECK(u32_type == type_of(u32_v));
 		TESTER_CHECK(string_literal(u32_type->name) == "u32");
 		TESTER_CHECK(u32_type->kind == TYPE_KIND_U32);
-		TESTER_CHECK(u32_type->size == sizeof(u32));
-		TESTER_CHECK(u32_type->align == alignof(u32));
+		TESTER_CHECK(u32_type->size == sizeof(U32));
+		TESTER_CHECK(u32_type->align == alignof(U32));
 
-		f32 f32_v = 1.0f;
-		const Type *f32_type = type_of<f32>();
+		F32 f32_v = 1.0f;
+		const Type *f32_type = type_of<F32>();
 		TESTER_CHECK(f32_type == type_of(f32_v));
 		TESTER_CHECK(string_literal(f32_type->name) == "f32");
 		TESTER_CHECK(f32_type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(f32_type->size == sizeof(f32));
-		TESTER_CHECK(f32_type->align == alignof(f32));
+		TESTER_CHECK(f32_type->size == sizeof(F32));
+		TESTER_CHECK(f32_type->align == alignof(F32));
 
 		bool bool_v = true;
 		const Type *bool_type = type_of<bool>();
@@ -441,24 +441,24 @@ TESTER_TEST("[CORE]: Reflect")
 		TESTER_CHECK(field_x.offset == offsetof(Vector3, x));
 		TESTER_CHECK(string_literal(field_x.type->name) == "f32");
 		TESTER_CHECK(field_x.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_x.type->size == sizeof(f32));
-		TESTER_CHECK(field_x.type->align == alignof(f32));
+		TESTER_CHECK(field_x.type->size == sizeof(F32));
+		TESTER_CHECK(field_x.type->align == alignof(F32));
 
 		auto field_y = vec3_type->as_struct.fields[1];
 		TESTER_CHECK(string_literal(field_y.name) == "y");
 		TESTER_CHECK(field_y.offset == offsetof(Vector3, y));
 		TESTER_CHECK(string_literal(field_y.type->name) == "f32");
 		TESTER_CHECK(field_y.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_y.type->size == sizeof(f32));
-		TESTER_CHECK(field_y.type->align == alignof(f32));
+		TESTER_CHECK(field_y.type->size == sizeof(F32));
+		TESTER_CHECK(field_y.type->align == alignof(F32));
 
 		auto field_z = vec3_type->as_struct.fields[2];
 		TESTER_CHECK(string_literal(field_z.name) == "z");
 		TESTER_CHECK(field_z.offset == offsetof(Vector3, z));
 		TESTER_CHECK(string_literal(field_z.type->name) == "f32");
 		TESTER_CHECK(field_z.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_z.type->size == sizeof(f32));
-		TESTER_CHECK(field_z.type->align == alignof(f32));
+		TESTER_CHECK(field_z.type->size == sizeof(F32));
+		TESTER_CHECK(field_z.type->align == alignof(F32));
 	}
 
 	// ("type_of<T> array")
@@ -486,24 +486,24 @@ TESTER_TEST("[CORE]: Reflect")
 		TESTER_CHECK(field_x.offset == offsetof(Vector3, x));
 		TESTER_CHECK(string_literal(field_x.type->name) == "f32");
 		TESTER_CHECK(field_x.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_x.type->size == sizeof(f32));
-		TESTER_CHECK(field_x.type->align == alignof(f32));
+		TESTER_CHECK(field_x.type->size == sizeof(F32));
+		TESTER_CHECK(field_x.type->align == alignof(F32));
 
 		auto field_y = vec3_type->as_struct.fields[1];
 		TESTER_CHECK(string_literal(field_y.name) == "y");
 		TESTER_CHECK(field_y.offset == offsetof(Vector3, y));
 		TESTER_CHECK(string_literal(field_y.type->name) == "f32");
 		TESTER_CHECK(field_y.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_y.type->size == sizeof(f32));
-		TESTER_CHECK(field_y.type->align == alignof(f32));
+		TESTER_CHECK(field_y.type->size == sizeof(F32));
+		TESTER_CHECK(field_y.type->align == alignof(F32));
 
 		auto field_z = vec3_type->as_struct.fields[2];
 		TESTER_CHECK(string_literal(field_z.name) == "z");
 		TESTER_CHECK(field_z.offset == offsetof(Vector3, z));
 		TESTER_CHECK(string_literal(field_z.type->name) == "f32");
 		TESTER_CHECK(field_z.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_z.type->size == sizeof(f32));
-		TESTER_CHECK(field_z.type->align == alignof(f32));
+		TESTER_CHECK(field_z.type->size == sizeof(F32));
+		TESTER_CHECK(field_z.type->align == alignof(F32));
 	}
 
 	// ("type_of<T> enum")
@@ -517,9 +517,9 @@ TESTER_TEST("[CORE]: Reflect")
 		TESTER_CHECK(reflect_enum_type->as_enum.values != nullptr);
 		TESTER_CHECK(reflect_enum_type->as_enum.value_count == 7);
 
-		for (u64 i = 0; i < reflect_enum_type->as_enum.value_count; ++i)
+		for (U64 i = 0; i < reflect_enum_type->as_enum.value_count; ++i)
 		{
-			TESTER_CHECK(reflect_enum_type->as_enum.values[i].index == (i32)i);
+			TESTER_CHECK(reflect_enum_type->as_enum.values[i].index == (I32)i);
 			TESTER_CHECK(reflect_enum_type->as_enum.values[i].name == format("REFLECT_ENUM_{}", i, memory::temp_allocator()));
 		}
 
@@ -614,61 +614,61 @@ TESTER_TEST("[CORE]: Reflect")
 		TESTER_CHECK(field_x.offset == offsetof(Vector3, x));
 		TESTER_CHECK(string_literal(field_x.type->name) == "f32");
 		TESTER_CHECK(field_x.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_x.type->size == sizeof(f32));
-		TESTER_CHECK(field_x.type->align == alignof(f32));
+		TESTER_CHECK(field_x.type->size == sizeof(F32));
+		TESTER_CHECK(field_x.type->align == alignof(F32));
 
 		auto field_y = vec3_type->as_struct.fields[1];
 		TESTER_CHECK(string_literal(field_y.name) == "y");
 		TESTER_CHECK(field_y.offset == offsetof(Vector3, y));
 		TESTER_CHECK(string_literal(field_y.type->name) == "f32");
 		TESTER_CHECK(field_y.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_y.type->size == sizeof(f32));
-		TESTER_CHECK(field_y.type->align == alignof(f32));
+		TESTER_CHECK(field_y.type->size == sizeof(F32));
+		TESTER_CHECK(field_y.type->align == alignof(F32));
 
 		auto field_z = vec3_type->as_struct.fields[2];
 		TESTER_CHECK(string_literal(field_z.name) == "z");
 		TESTER_CHECK(field_z.offset == offsetof(Vector3, z));
 		TESTER_CHECK(string_literal(field_z.type->name) == "f32");
 		TESTER_CHECK(field_z.type->kind == TYPE_KIND_F32);
-		TESTER_CHECK(field_z.type->size == sizeof(f32));
-		TESTER_CHECK(field_z.type->align == alignof(f32));
+		TESTER_CHECK(field_z.type->size == sizeof(F32));
+		TESTER_CHECK(field_z.type->align == alignof(F32));
 	}
 
 	// ("type_of<T> template struct")
 	{
-		TESTER_CHECK(type_of<Point<f32>>() != nullptr);
+		TESTER_CHECK(type_of<Point<F32>>() != nullptr);
 		TESTER_CHECK(type_of<Point<Vector3>>() != nullptr);
 		TESTER_CHECK(type_of<Point<Point<Vector3>>>() != nullptr);
 
-		const Type *point_i32_type = type_of(Point<i32>{1, 2, 3});
-		TESTER_CHECK(point_i32_type == type_of<Point<i32>>());
+		const Type *point_i32_type = type_of(Point<I32>{1, 2, 3});
+		TESTER_CHECK(point_i32_type == type_of<Point<I32>>());
 		TESTER_CHECK(string_literal(point_i32_type->name) == "Point<i32>");
 		TESTER_CHECK(point_i32_type->kind == TYPE_KIND_STRUCT);
-		TESTER_CHECK(point_i32_type->size == sizeof(Point<i32>));
-		TESTER_CHECK(point_i32_type->align == alignof(Point<i32>));
+		TESTER_CHECK(point_i32_type->size == sizeof(Point<I32>));
+		TESTER_CHECK(point_i32_type->align == alignof(Point<I32>));
 		TESTER_CHECK(point_i32_type->as_struct.fields != nullptr);
 		TESTER_CHECK(point_i32_type->as_struct.field_count == 3);
 
-		Foo_Struct<f32, i32> foo = {1.5f, 1};
-		auto foo_f32_i32_type = type_of<Foo_Struct<f32, i32>>();
+		Foo_Struct<F32, I32> foo = {1.5f, 1};
+		auto foo_f32_i32_type = type_of<Foo_Struct<F32, I32>>();
 		TESTER_CHECK(foo_f32_i32_type == type_of(foo));
 
-		auto foo_point_vector3_type = type_of<Foo_Struct<Point<i32>, Vector3>>();
+		auto foo_point_vector3_type = type_of<Foo_Struct<Point<I32>, Vector3>>();
 		TESTER_CHECK(string_literal(foo_point_vector3_type->name) == "Foo_Struct<Point<i32>,Vector3>");
 		TESTER_CHECK(foo_point_vector3_type->kind == TYPE_KIND_STRUCT);
-		TESTER_CHECK(foo_point_vector3_type->size == sizeof(Foo_Struct<Point<i32>, Vector3>));
-		TESTER_CHECK(foo_point_vector3_type->align == alignof(Foo_Struct<Point<i32>, Vector3>));
+		TESTER_CHECK(foo_point_vector3_type->size == sizeof(Foo_Struct<Point<I32>, Vector3>));
+		TESTER_CHECK(foo_point_vector3_type->align == alignof(Foo_Struct<Point<I32>, Vector3>));
 		TESTER_CHECK(foo_point_vector3_type->as_struct.fields != nullptr);
 		TESTER_CHECK(foo_point_vector3_type->as_struct.field_count == 2);
 
-		using foo_point_vector3_templated_type = Foo_Struct<Point<i32>, Vector3>;
+		using foo_point_vector3_templated_type = Foo_Struct<Point<I32>, Vector3>;
 		auto foo_point_vector3_field_x = foo_point_vector3_type->as_struct.fields[0];
 		TESTER_CHECK(string_literal(foo_point_vector3_field_x.name) == "x");
 		TESTER_CHECK(foo_point_vector3_field_x.offset == offsetof(foo_point_vector3_templated_type, x));
 		TESTER_CHECK(string_literal(foo_point_vector3_field_x.type->name) == "Point<i32>");
 		TESTER_CHECK(foo_point_vector3_field_x.type->kind == TYPE_KIND_STRUCT);
-		TESTER_CHECK(foo_point_vector3_field_x.type->size == sizeof(Point<i32>));
-		TESTER_CHECK(foo_point_vector3_field_x.type->align == alignof(Point<i32>));
+		TESTER_CHECK(foo_point_vector3_field_x.type->size == sizeof(Point<I32>));
+		TESTER_CHECK(foo_point_vector3_field_x.type->align == alignof(Point<I32>));
 		TESTER_CHECK(foo_point_vector3_field_x.type->as_struct.fields != nullptr);
 		TESTER_CHECK(foo_point_vector3_field_x.type->as_struct.field_count == 3);
 
@@ -685,8 +685,8 @@ TESTER_TEST("[CORE]: Reflect")
 
 	// ("type_of<T> template class")
 	{
-		Foo_Class<i32> foo_class = {};
-		auto foo_class_i32_type = type_of<Foo_Class<i32>>();
+		Foo_Class<I32> foo_class = {};
+		auto foo_class_i32_type = type_of<Foo_Class<I32>>();
 		TESTER_CHECK(foo_class_i32_type == type_of(foo_class));
 		TESTER_CHECK(foo_class_i32_type->as_struct.field_count == 3);
 		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].name == string_literal("z"));
@@ -694,27 +694,27 @@ TESTER_TEST("[CORE]: Reflect")
 		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].tag == string_literal(""));
 		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].type->name == string_literal("i32"));
 		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].type->kind == TYPE_KIND_I32);
-		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].type->size == sizeof(i32));
-		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].type->align == alignof(i32));
+		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].type->size == sizeof(I32));
+		TESTER_CHECK(foo_class_i32_type->as_struct.fields[2].type->align == alignof(I32));
 	}
 
 	// ("type_of<T> containers")
 	{
-		auto array_u8_type = type_of<Array<u8>>();
-		TESTER_CHECK(array_u8_type == type_of(Array<u8>{}));
+		auto array_u8_type = type_of<Array<U8>>();
+		TESTER_CHECK(array_u8_type == type_of(Array<U8>{}));
 
 		auto string_type = type_of<String>();
 		TESTER_CHECK(string_type == type_of(String{}));
 
-		auto hash_table_i32_string_type = type_of<Hash_Table<i32, String>>();
-		TESTER_CHECK(hash_table_i32_string_type == type_of(Hash_Table<i32, String>{}));
+		auto hash_table_i32_string_type = type_of<Hash_Table<I32, String>>();
+		TESTER_CHECK(hash_table_i32_string_type == type_of(Hash_Table<I32, String>{}));
 	}
 
 	// ("value_of(T)")
 	{
-		i32 v = 1;
+		I32 v = 1;
 		auto i32_value = value_of(v);
-		TESTER_CHECK(*(i32 *)i32_value.data == *(i32 *)value_of((i32)1).data);
-		TESTER_CHECK(i32_value.type == value_of((i32)1).type);
+		TESTER_CHECK(*(I32 *)i32_value.data == *(I32 *)value_of((I32)1).data);
+		TESTER_CHECK(i32_value.type == value_of((I32)1).type);
 	}
 }
\ No newline at end of file
diff --git a/unittest/src/unittest_serializer.cpp b/unittest/src/unittest_serializer.cpp
index 7622658c..6acd0fb0 100644
--- a/unittest/src/unittest_serializer.cpp
+++ b/unittest/src/unittest_serializer.cpp
@@ -4,24 +4,24 @@
 
 struct Game
 {
-	u64 a;
-	u64 b;
-	f32 c;
+	U64 a;
+	U64 b;
+	F32 c;
 	char d;
-	Array<f32> e;
+	Array<F32> e;
 	String f;
-	Hash_Table<String, f32> g;
-	i32 *h;
+	Hash_Table<String, F32> g;
+	I32 *h;
 };
 
 inline static Game
 game_init(memory::Allocator *allocator = memory::heap_allocator())
 {
 	Game self = {};
-	self.e = array_init<f32>(allocator);
+	self.e = array_init<F32>(allocator);
 	self.f = string_init(allocator);
-	self.g = hash_table_init<String, f32>(allocator);
-	self.h = memory::allocate<i32>(allocator);
+	self.g = hash_table_init<String, F32>(allocator);
+	self.h = memory::allocate<I32>(allocator);
 	return self;
 }
 
@@ -58,16 +58,16 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Serializer serializer = binary_serializer_init();
 		DEFER(binary_serializer_deinit(serializer));
 
-		i8   a1 = 1;
-		i16  b1 = 2;
-		i32  c1 = 3;
-		i64  d1 = 4;
-		u8   e1 = 5;
-		u16  f1 = 6;
-		u32  g1 = 7;
-		u64  h1 = 8;
-		f32  i1 = 9;
-		f64  j1 = 10;
+		I8   a1 = 1;
+		I16  b1 = 2;
+		I32  c1 = 3;
+		I64  d1 = 4;
+		U8   e1 = 5;
+		U16  f1 = 6;
+		U32  g1 = 7;
+		U64  h1 = 8;
+		F32  i1 = 9;
+		F64  j1 = 10;
 		char k1 = 'A';
 		bool l1 = true;
 
@@ -87,16 +87,16 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Deserializer deserializer = binary_deserializer_init(serializer.buffer);
 		DEFER(binary_deserializer_deinit(deserializer));
 
-		i8   a2 = 0;
-		i16  b2 = 0;
-		i32  c2 = 0;
-		i64  d2 = 0;
-		u8   e2 = 0;
-		u16  f2 = 0;
-		u32  g2 = 0;
-		u64  h2 = 0;
-		f32  i2 = 0;
-		f64  j2 = 0;
+		I8   a2 = 0;
+		I16  b2 = 0;
+		I32  c2 = 0;
+		I64  d2 = 0;
+		U8   e2 = 0;
+		U16  f2 = 0;
+		U32  g2 = 0;
+		U64  h2 = 0;
+		F32  i2 = 0;
+		F64  j2 = 0;
 		char k2 = 0;
 		bool l2 = 0;
 
@@ -132,16 +132,16 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Serializer serializer = binary_serializer_init();
 		DEFER(binary_serializer_deinit(serializer));
 
-		i32 i1 = 5;
-		i32 *a1 = &i1;
+		I32 i1 = 5;
+		I32 *a1 = &i1;
 
 		serialize(serializer, {"a1", a1});
 
 		Binary_Deserializer deserializer = binary_deserializer_init(serializer.buffer);
 		DEFER(binary_deserializer_deinit(deserializer));
 
-		i32 i2 = 0;
-		i32 *a2 = &i2;
+		I32 i2 = 0;
+		I32 *a2 = &i2;
 
 		serialize(deserializer, {"a1", a2});
 
@@ -153,15 +153,15 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Serializer serializer = binary_serializer_init();
 		DEFER(binary_serializer_deinit(serializer));
 
-		i32 a1[5]    = {1, 2, 3, 4, 5};
-		Array<i8> b1 = array_init_from<i8>({1, 2, 3, 4, 5});
+		I32 a1[5]    = {1, 2, 3, 4, 5};
+		Array<I8> b1 = array_init_from<I8>({1, 2, 3, 4, 5});
 		DEFER(array_deinit(b1));
 
 		serialize(serializer, {"a1", a1});
 		serialize(serializer, {"b1", b1});
 
-		i32 a2[5]    = {};
-		Array<i8> b2 = {};
+		I32 a2[5]    = {};
+		Array<I8> b2 = {};
 		DEFER(array_deinit(b2));
 
 		Binary_Deserializer deserializer = binary_deserializer_init(serializer.buffer);
@@ -170,7 +170,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		serialize(deserializer, {"a1", a2});
 		serialize(deserializer, {"b1", b2});
 
-		for (u64 i = 0; i < count_of(a1); ++i)
+		for (U64 i = 0; i < count_of(a1); ++i)
 		{
 			TESTER_CHECK(a1[i] == a2[i]);
 			TESTER_CHECK(b1[i] == b2[i]);
@@ -211,7 +211,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Serializer serializer = binary_serializer_init();
 		DEFER(binary_serializer_deinit(serializer));
 
-		Hash_Table<i32, String> a1 = hash_table_init_from<i32, String>({
+		Hash_Table<I32, String> a1 = hash_table_init_from<I32, String>({
 			{1, string_literal("A")},
 			{2, string_literal("B")},
 			{3, string_literal("C")},
@@ -220,7 +220,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 
 		serialize(serializer, {"a1", a1});
 
-		Hash_Table<i32, String> a2 = {};
+		Hash_Table<I32, String> a2 = {};
 		DEFER(destroy(a2));
 
 		Binary_Deserializer deserializer = binary_deserializer_init(serializer.buffer);
@@ -231,7 +231,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		TESTER_CHECK(a1.count    == a2.count);
 		TESTER_CHECK(a1.capacity == a2.capacity);
 
-		for (u64 i = 0; i < a1.entries.count; ++i)
+		for (U64 i = 0; i < a1.entries.count; ++i)
 		{
 			TESTER_CHECK(a1.entries[i].key   == a2.entries[i].key);
 			TESTER_CHECK(a1.entries[i].value == a2.entries[i].value);
@@ -243,7 +243,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Serializer serializer = binary_serializer_init();
 		DEFER(binary_serializer_deinit(serializer));
 
-		i32 i = 5;
+		I32 i = 5;
 
 		Block a1 = {&i, sizeof(i)};
 
@@ -257,7 +257,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 
 		serialize(deserializer, {"a1", a2});
 
-		TESTER_CHECK(*((i32 *)a1.data) == *((i32 *)a2.data));
+		TESTER_CHECK(*((I32 *)a1.data) == *((I32 *)a2.data));
 		TESTER_CHECK(a1.size == a2.size);
 	}
 
@@ -297,7 +297,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		TESTER_CHECK(new_game.c == original_game.c);
 		TESTER_CHECK(new_game.d == original_game.d);
 
-		for (u64 i = 0; i < new_game.e.count; ++i)
+		for (U64 i = 0; i < new_game.e.count; ++i)
 			TESTER_CHECK(new_game.e[i] == original_game.e[i]);
 
 		TESTER_CHECK(new_game.f == original_game.f);
@@ -348,7 +348,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		TESTER_CHECK(new_game.c == original_game.c);
 		TESTER_CHECK(new_game.d == original_game.d);
 
-		for (u64 i = 0; i < new_game.e.count; ++i)
+		for (U64 i = 0; i < new_game.e.count; ++i)
 			TESTER_CHECK(new_game.e[i] == original_game.e[i]);
 
 		TESTER_CHECK(new_game.f == original_game.f);
@@ -368,7 +368,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Serializer serializer = binary_serializer_init();
 		DEFER(binary_serializer_deinit(serializer));
 
-		i32 a1 = 1;
+		I32 a1 = 1;
 		Error error1 = serialize(serializer, a1);
 		TESTER_CHECK(error1 == true);
 		TESTER_CHECK(error1.message == "[SERIALIZER][BINARY]: Please use Serialize_Pair, for e.x 'serialize(serializer, {\"a\", a})'.");
@@ -376,7 +376,7 @@ TESTER_TEST("[CORE]: Binary_Serializer")
 		Binary_Deserializer deserializer = binary_deserializer_init(serializer.buffer);
 		DEFER(binary_deserializer_deinit(deserializer));
 
-		i32 a2 = 0;
+		I32 a2 = 0;
 		Error error2 = serialize(deserializer, a2);
 		TESTER_CHECK(error2 == true);
 		TESTER_CHECK(error2.message == "[DESERIALIZER][BINARY]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {\"a\", a})'.");
@@ -392,16 +392,16 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Serializer serializer = json_serializer_init();
 		DEFER(json_serializer_deinit(serializer));
 
-		i8   a1 = 1;
-		i16  b1 = 2;
-		i32  c1 = 3;
-		i64  d1 = 4;
-		u8   e1 = 5;
-		u16  f1 = 6;
-		u32  g1 = 7;
-		u64  h1 = 8;
-		f32  i1 = 9;
-		f64  j1 = 10;
+		I8   a1 = 1;
+		I16  b1 = 2;
+		I32  c1 = 3;
+		I64  d1 = 4;
+		U8   e1 = 5;
+		U16  f1 = 6;
+		U32  g1 = 7;
+		U64  h1 = 8;
+		F32  i1 = 9;
+		F64  j1 = 10;
 		char k1 = 'A';
 		bool l1 = true;
 
@@ -418,16 +418,16 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		serialize(serializer, {"k1", k1});
 		serialize(serializer, {"l1", l1});
 
-		i8   a2 = 0;
-		i16  b2 = 0;
-		i32  c2 = 0;
-		i64  d2 = 0;
-		u8   e2 = 0;
-		u16  f2 = 0;
-		u32  g2 = 0;
-		u64  h2 = 0;
-		f32  i2 = 0;
-		f64  j2 = 0;
+		I8   a2 = 0;
+		I16  b2 = 0;
+		I32  c2 = 0;
+		I64  d2 = 0;
+		U8   e2 = 0;
+		U16  f2 = 0;
+		U32  g2 = 0;
+		U64  h2 = 0;
+		F32  i2 = 0;
+		F64  j2 = 0;
 		char k2 = 0;
 		bool l2 = 0;
 
@@ -466,16 +466,16 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Serializer serializer = json_serializer_init();
 		DEFER(json_serializer_deinit(serializer));
 
-		i32 i1 = 5;
-		i32 *a1 = &i1;
+		I32 i1 = 5;
+		I32 *a1 = &i1;
 
 		serialize(serializer, {"a1", a1});
 
 		Json_Deserializer deserializer = json_deserializer_init(serializer.values[0]);
 		DEFER(json_deserializer_deinit(deserializer));
 
-		i32 i2 = 0;
-		i32 *a2 = &i2;
+		I32 i2 = 0;
+		I32 *a2 = &i2;
 
 		serialize(deserializer, {"a1", a2});
 
@@ -488,8 +488,8 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Serializer serializer = json_serializer_init();
 		DEFER(json_serializer_deinit(serializer));
 
-		i32 a1[3]    = {1, 2, 3};
-		Array<i8> b1 = array_init_from<i8>({1, 2, 3});
+		I32 a1[3]    = {1, 2, 3};
+		Array<I8> b1 = array_init_from<I8>({1, 2, 3});
 		DEFER(array_deinit(b1));
 
 		serialize(serializer, {"a1", a1});
@@ -498,14 +498,14 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Deserializer deserializer = json_deserializer_init(serializer.values[0]);
 		DEFER(json_deserializer_deinit(deserializer));
 
-		i32 a2[3]    = {};
-		Array<i8> b2 = {};
+		I32 a2[3]    = {};
+		Array<I8> b2 = {};
 		DEFER(array_deinit(b2));
 
 		serialize(deserializer, {"a1", a2});
 		serialize(deserializer, {"b1", b2});
 
-		for (u64 i = 0; i < count_of(a1); ++i)
+		for (U64 i = 0; i < count_of(a1); ++i)
 		{
 			TESTER_CHECK(a1[i] == a2[i]);
 			TESTER_CHECK(b1[i] == b2[i]);
@@ -546,7 +546,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Serializer serializer = json_serializer_init();
 		DEFER(json_serializer_deinit(serializer));
 
-		Hash_Table<i32, String> a1 = hash_table_init_from<i32, String>({
+		Hash_Table<I32, String> a1 = hash_table_init_from<I32, String>({
 			{1, string_literal("A")},
 			{2, string_literal("B")},
 			{3, string_literal("C")},
@@ -558,7 +558,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Deserializer deserializer = json_deserializer_init(serializer.values[0]);
 		DEFER(json_deserializer_deinit(deserializer));
 
-		Hash_Table<i32, String> a2 = {};
+		Hash_Table<I32, String> a2 = {};
 		DEFER(destroy(a2));
 
 		serialize(deserializer, {"a1", a2});
@@ -566,7 +566,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		TESTER_CHECK(a1.count    == a2.count);
 		TESTER_CHECK(a1.capacity == a2.capacity);
 
-		for (u64 i = 0; i < a1.entries.count; ++i)
+		for (U64 i = 0; i < a1.entries.count; ++i)
 		{
 			TESTER_CHECK(a1.entries[i].key   == a2.entries[i].key);
 			TESTER_CHECK(a1.entries[i].value == a2.entries[i].value);
@@ -578,7 +578,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Serializer serializer = json_serializer_init();
 		DEFER(json_serializer_deinit(serializer));
 
-		i32 i = 5;
+		I32 i = 5;
 
 		Block a1 = {&i, sizeof(i)};
 
@@ -592,7 +592,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 
 		serialize(deserializer, {"a1", a2});
 
-		TESTER_CHECK(*((i32 *)a1.data) == *((i32 *)a2.data));
+		TESTER_CHECK(*((I32 *)a1.data) == *((I32 *)a2.data));
 		TESTER_CHECK(a1.size == a2.size);
 	}
 
@@ -632,7 +632,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		TESTER_CHECK(new_game.c == original_game.c);
 		TESTER_CHECK(new_game.d == original_game.d);
 
-		for (u64 i = 0; i < new_game.e.count; ++i)
+		for (U64 i = 0; i < new_game.e.count; ++i)
 			TESTER_CHECK(new_game.e[i] == original_game.e[i]);
 
 		TESTER_CHECK(new_game.f == original_game.f);
@@ -683,13 +683,13 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 	{
 		// ("Fundamental types")
 		{
-			i32 a1 = 1;
+			I32 a1 = 1;
 			auto [buffer, error] = to_json(a1);
 			DEFER(string_deinit(buffer));
 
 			TESTER_CHECK(buffer == "{\n\t\"data\": 1\n}");
 
-			i32 a2 = 0;
+			I32 a2 = 0;
 			from_json(buffer, a2);
 
 			TESTER_CHECK(a1 == a2);
@@ -728,7 +728,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 			TESTER_CHECK(new_game.c == original_game.c);
 			TESTER_CHECK(new_game.d == original_game.d);
 
-			for (u64 i = 0; i < new_game.e.count; ++i)
+			for (U64 i = 0; i < new_game.e.count; ++i)
 				TESTER_CHECK(new_game.e[i] == original_game.e[i]);
 
 			TESTER_CHECK(new_game.f == original_game.f);
@@ -749,7 +749,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Serializer serializer = json_serializer_init();
 		DEFER(json_serializer_deinit(serializer));
 
-		i32 a1 = 1;
+		I32 a1 = 1;
 		Error error1 = serialize(serializer, a1);
 		TESTER_CHECK(error1 == true);
 		TESTER_CHECK(error1.message == "[SERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(serializer, {\"a\", a})'.");
@@ -757,7 +757,7 @@ TESTER_TEST("[CORE]: JSON_Serializer")
 		Json_Deserializer deserializer = json_deserializer_init(serializer.values[0]);
 		DEFER(json_deserializer_deinit(deserializer));
 
-		i32 a2 = 0;
+		I32 a2 = 0;
 		Error error2 = serialize(deserializer, a2);
 		TESTER_CHECK(error2 == true);
 		TESTER_CHECK(error2.message == "[DESERIALIZER][JSON]: Please use Serialize_Pair, for e.x 'serialize(deserializer, {\"a\", a})'.");