diff --git a/.github/workflows/broker-check.yml b/.github/workflows/broker-check.yml index 120233b1..39e8fecf 100644 --- a/.github/workflows/broker-check.yml +++ b/.github/workflows/broker-check.yml @@ -63,6 +63,27 @@ jobs: cflags: "" wolfmqtt_opts: "--enable-v5 --enable-broker --enable-max-qos=0" skip_broker_test: "yes" + - name: "Broker v5 (ordering / Receive Maximum)" + cflags: "" + wolfmqtt_opts: "--enable-broker --enable-v5" + - name: "Broker v5 strict-serial (inflight=1)" + cflags: "-DBROKER_MAX_INFLIGHT_PER_SUB=1" + wolfmqtt_opts: "--enable-broker --enable-v5" + - name: "Broker v5 static memory" + cflags: "-DWOLFMQTT_STATIC_MEMORY" + wolfmqtt_opts: "--enable-broker --enable-v5" + - name: "Broker with persist" + cflags: "" + wolfmqtt_opts: "--enable-broker --enable-v5 --enable-broker-persist" + - name: "Broker with persist + TLS" + cflags: "" + wolfmqtt_opts: "--enable-broker --enable-v5 --enable-broker-persist --enable-tls" + - name: "Broker with persist + AES-GCM encryption" + cflags: "" + wolfmqtt_opts: "--enable-broker --enable-v5 --enable-broker-persist --enable-broker-persist-encrypt" + - name: "Broker with persist + static memory" + cflags: "-DWOLFMQTT_STATIC_MEMORY" + wolfmqtt_opts: "--enable-broker --enable-v5 --enable-broker-persist" steps: - name: Install dependencies diff --git a/.github/workflows/macos-check.yml b/.github/workflows/macos-check.yml index 3666f8cb..1675ad27 100644 --- a/.github/workflows/macos-check.yml +++ b/.github/workflows/macos-check.yml @@ -84,5 +84,36 @@ jobs: - name: Show logs on failure if: failure() || cancelled() run: | - cat test-suite.log - cat scripts/*.log + # Copy broker.test tmp dirs ($TMPDIR/tmp.* on macOS, e.g. + # /var/folders/.../T/tmp.XXXXXX) into the workspace so the + # next step can upload them as an artifact. Globbing the + # real /var/folders path directly trips over unreadable + # macOS LaunchServices files. + mkdir -p ci-logs + [ -f test-suite.log ] && cp test-suite.log ci-logs/ || true + cp scripts/*.log ci-logs/ 2>/dev/null || true + for d in "${TMPDIR%/}"/tmp.* /tmp/tmp.*; do + [ -d "$d" ] || continue + base=$(basename "$d") + mkdir -p "ci-logs/$base" + cp "$d"/*.log "ci-logs/$base/" 2>/dev/null || true + done + echo "=== test-suite.log ===" + cat test-suite.log 2>/dev/null || true + echo "=== scripts/*.log ===" + cat scripts/*.log 2>/dev/null || true + echo "=== ci-logs/tmp.*/*.log (broker.test per-test logs) ===" + for f in ci-logs/tmp.*/*.log; do + [ -f "$f" ] || continue + echo "--- $f ---" + cat "$f" + done + + - name: Upload broker.test logs on failure + if: failure() || cancelled() + uses: actions/upload-artifact@v4 + with: + name: broker-test-logs-macos + path: ci-logs/ + if-no-files-found: ignore + retention-days: 7 diff --git a/configure.ac b/configure.ac index d56a74b5..ac62a8ed 100644 --- a/configure.ac +++ b/configure.ac @@ -498,6 +498,58 @@ then AM_CFLAGS="$AM_CFLAGS -DWOLFMQTT_BROKER_NO_INSECURE" fi +# Broker persistent storage (sessions, subs, retained, offline queue). +# Opt-in; off by default. Adds the hook-based persistence layer plus a +# default POSIX backend. +AC_ARG_ENABLE([broker-persist], +[AS_HELP_STRING([--enable-broker-persist],[Enable broker persistent storage via callback hooks (default: disabled)])], +[ ENABLED_BROKER_PERSIST=$enableval ], +[ ENABLED_BROKER_PERSIST=no ] +) +if test "x$ENABLED_BROKER_PERSIST" = "xyes" +then + if test "x$ENABLED_BROKER" != "xyes" + then + AC_MSG_ERROR([--enable-broker-persist requires --enable-broker]) + fi +AM_CFLAGS="$AM_CFLAGS -DWOLFMQTT_BROKER_PERSIST" +fi + +# Optional encryption-at-rest for persisted records using wolfCrypt AES-GCM. +# Requires --enable-broker-persist; off by default. +AC_ARG_ENABLE([broker-persist-encrypt], +[AS_HELP_STRING([--enable-broker-persist-encrypt],[Encrypt persisted records with AES-GCM (default: disabled, requires --enable-broker-persist)])], +[ ENABLED_BROKER_PERSIST_ENCRYPT=$enableval ], +[ ENABLED_BROKER_PERSIST_ENCRYPT=no ] +) +if test "x$ENABLED_BROKER_PERSIST_ENCRYPT" = "xyes" +then + if test "x$ENABLED_BROKER_PERSIST" != "xyes" + then + AC_MSG_ERROR([--enable-broker-persist-encrypt requires --enable-broker-persist]) + fi +AM_CFLAGS="$AM_CFLAGS -DWOLFMQTT_BROKER_PERSIST_ENCRYPT" +fi + +# Development-only fixed-pattern derive_key hook for the CLI broker. +# Off by default; required to use "-E dev" on encrypt builds. Never +# enable in a production build - the resulting binary contains a +# trivially-recoverable AES-GCM key generator that any flip of the +# CLI argument would substitute for real key management. +AC_ARG_ENABLE([broker-persist-encrypt-dev-key], +[AS_HELP_STRING([--enable-broker-persist-encrypt-dev-key],[Link the CLI broker's fixed-pattern "dev" derive_key hook (default: disabled; requires --enable-broker-persist-encrypt; NOT FOR PRODUCTION)])], +[ ENABLED_BROKER_PERSIST_ENCRYPT_DEV_KEY=$enableval ], +[ ENABLED_BROKER_PERSIST_ENCRYPT_DEV_KEY=no ] +) +if test "x$ENABLED_BROKER_PERSIST_ENCRYPT_DEV_KEY" = "xyes" +then + if test "x$ENABLED_BROKER_PERSIST_ENCRYPT" != "xyes" + then + AC_MSG_ERROR([--enable-broker-persist-encrypt-dev-key requires --enable-broker-persist-encrypt]) + fi +AM_CFLAGS="$AM_CFLAGS -DWOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY" +fi + AM_CONDITIONAL([HAVE_LIBWOLFSSL], [test "x$ENABLED_TLS" = "xyes"]) AM_CONDITIONAL([HAVE_LIBCURL], [test "x$ENABLED_CURL" = "xyes"]) diff --git a/examples/pub-sub/mqtt-sub.c b/examples/pub-sub/mqtt-sub.c index 5caaf1f3..c2905a1a 100644 --- a/examples/pub-sub/mqtt-sub.c +++ b/examples/pub-sub/mqtt-sub.c @@ -527,6 +527,13 @@ int sub_client(MQTTCtx *mqttCtx) break; } } + #ifdef WOLFMQTT_NONBLOCK + else if (rc == MQTT_CODE_CONTINUE) { + /* Non-blocking: no data yet, keep polling. mqtt_check_timeout() + * above will convert this to MQTT_CODE_ERROR_TIMEOUT after the + * inactivity window, which drives the keep-alive ping branch. */ + } + #endif else if (rc != MQTT_CODE_SUCCESS) { /* There was an error */ PRINTF("MQTT Message Wait: %s (%d)", diff --git a/scripts/broker.test b/scripts/broker.test index 62c51e57..80c7a9e9 100755 --- a/scripts/broker.test +++ b/scripts/broker.test @@ -87,8 +87,6 @@ start_broker() { wait $broker_pid 2>/dev/null || true broker_pid=$no_pid fi - generate_port - local tls_args="" local has_tls=0 local has_s_port=0 for arg in "$@"; do @@ -98,13 +96,33 @@ start_broker() { has_s_port=1 fi done - if [ "$has_tls" -eq 1 ] && [ "$has_s_port" -eq 0 ]; then - tls_args="-s $port" - fi - broker_log="${TMP_DIR}/broker_p${port}.log" - ./$broker_bin "$@" -p $port $tls_args >"$broker_log" 2>&1 & - broker_pid=$! - check_broker + # Retry a few times to absorb random-port collisions with + # unrelated processes on this host (the broker binds without + # SO_REUSEADDR, so a previous PID lingering in TIME_WAIT on the + # same port also lands us here). + local attempt=0 + while [ $attempt -lt 5 ]; do + generate_port + local tls_args="" + if [ "$has_tls" -eq 1 ] && [ "$has_s_port" -eq 0 ]; then + tls_args="-s $port" + fi + broker_log="${TMP_DIR}/broker_p${port}.log" + # -v 3 = DEBUG; emits PUBLISH/PUBACK/drain events so the + # preserved broker_p.log makes broker-side failures + # actually diagnosable in CI. + ./$broker_bin "$@" -v 3 -p $port $tls_args >"$broker_log" 2>&1 & + broker_pid=$! + if check_broker; then + return 0 + fi + kill $broker_pid 2>/dev/null + wait $broker_pid 2>/dev/null || true + broker_pid=$no_pid + attempt=$((attempt + 1)) + done + echo "WARNING: broker failed to start after 5 attempts" >&2 + return 1 } # Start broker with dual ports (plain + TLS). @@ -121,7 +139,7 @@ start_broker_dual() { port_tls=$port port=$plain_port broker_log="${TMP_DIR}/broker_dual_p${port}_t${port_tls}.log" - ./$broker_bin "$@" -p $port -s $port_tls >"$broker_log" 2>&1 & + ./$broker_bin "$@" -v 3 -p $port -s $port_tls >"$broker_log" 2>&1 & broker_pid=$! check_broker $port check_broker $port_tls @@ -141,12 +159,35 @@ has_will=no has_wildcards=no has_insecure=no has_tls=no +has_persist=no echo "$broker_features" | grep -q "auth" && has_auth=yes echo "$broker_features" | grep -q "retained" && has_retained=yes echo "$broker_features" | grep -q " will" && has_will=yes echo "$broker_features" | grep -q "wildcards" && has_wildcards=yes echo "$broker_features" | grep -q "insecure" && has_insecure=yes echo "$broker_features" | grep -q "tls" && has_tls=yes +echo "$broker_features" | grep -q " persist" && has_persist=yes +has_persist_encrypt=no +echo "$broker_features" | grep -q " persist-encrypt" && \ + has_persist_encrypt=yes +has_persist_encrypt_dev_key=no +echo "$broker_features" | grep -q " persist-encrypt-dev-key" && \ + has_persist_encrypt_dev_key=yes +has_static_memory=no +echo "$broker_features" | grep -q " static-memory" && \ + has_static_memory=yes + +# Persist-encrypt builds refuse to start without an explicit key source. +# CLI tests opt into the development key with -E dev (NOT for production +# - the key is a fixed pattern in the binary, only linked into the +# binary when --enable-broker-persist-encrypt-dev-key was passed at +# configure time). For non-encrypt builds and encrypt builds without +# the dev-key hook this stays empty; the encrypt-CLI tests check the +# capability flag explicitly and SKIP when missing. +broker_dir_flags="" +if [ "$has_persist_encrypt_dev_key" = "yes" ]; then + broker_dir_flags="-E dev" +fi # Determine if plain (non-TLS) tests can run skip_plain=no @@ -604,12 +645,19 @@ if [ "$has_v5" = "yes" ]; then T12_RC=$? # 12b: Verify CONNACK server properties were received. - # Type 37 = Retain Available, Type 40 = Wildcard Subscription Available. - # Maximum QoS (Type 36) is intentionally omitted per [MQTT-3.2.2.3.4] - # (absence signals QoS 2 support; emitting Max QoS=2 is a Protocol Error). + # Type 37 = Retain Available, Type 40 = Wildcard Subscription Available, + # Type 33 = Receive Maximum. Type 36 (Maximum QoS) is intentionally NOT + # emitted on default builds - [MQTT-3.2.2.3.4] forbids values other + # than 0 or 1 there, and absence implies the broker supports QoS 2 + # (which it does). A regression that re-adds it on a full-QoS build + # would break strict v5 clients like mosquitto. T12_PROPS=yes grep -q "Property CB: Type 37" "${TMP_DIR}/t12.log" 2>/dev/null || T12_PROPS=no grep -q "Property CB: Type 40" "${TMP_DIR}/t12.log" 2>/dev/null || T12_PROPS=no + grep -q "Property CB: Type 33" "${TMP_DIR}/t12.log" 2>/dev/null || T12_PROPS=no + if grep -q "Property CB: Type 36" "${TMP_DIR}/t12.log" 2>/dev/null; then + T12_PROPS=no + fi # 12c: v5 pub/sub with separate clients (property forwarding) start_broker @@ -1083,6 +1131,486 @@ else fi fi +# --- Test 26: QoS 1 burst delivery preserves order [MQTT-4.6.0-3] --- +# Validates the per-subscriber outbound queue (BrokerHandle_Publish enqueue + +# BrokerClient_DrainOutQueue) introduced for #7 ordered delivery. A burst +# of 20 sequential QoS 1 publishes from one publisher to one subscriber +# must all arrive, and in publish order, regardless of the inflight cap. +echo "" +echo "--- Test 26: QoS 1 burst delivery (ordered) [MQTT-4.6.0-3] ---" +if [ "$skip_plain" = "yes" ]; then + echo "SKIP: Burst delivery (plain listener disabled)" +else +start_broker +T26_N=20 +rm -f "${TMP_DIR}/t26_sub.ready" +# Notes on subscriber flags: +# -C 5000 command timeout per wait, in milliseconds. NOT a message +# count - mqtt-sub uses -C as cmd_timeout_ms (mqttexample.c +# case 'C'). Earlier revisions of this test mistakenly +# passed -C $T26_N (=20), giving a 20 ms timeout that on +# nonblock builds (--enable-all enables WOLFMQTT_NONBLOCK) +# tripped MqttClient_Unsubscribe / Ping_ex before any +# output flushed, especially on macOS arm64. +# -k 300 keep-alive seconds; bumped above worst-case test duration +# so the client-side keep-alive does not fire mid-burst on +# slow CI runners. Inbound PUBLISH traffic does not reset +# the client keep-alive timer. +# Subscriber stays alive until the test kills it after the publishers +# finish; that pattern is below. +timeout 30 ./$sub_bin -T -h 127.0.0.1 -p $port -n "test/burst" -q 1 \ + -i "t26_sub" -C 5000 -k 300 -R "${TMP_DIR}/t26_sub.ready" \ + >"${TMP_DIR}/t26_sub.log" 2>&1 & +T26_SUB_PID=$! +TEST_PIDS+=($T26_SUB_PID) +wait_for_file "${TMP_DIR}/t26_sub.ready" 10 +# Publish T26_N sequentially numbered QoS 1 messages +for t26_i in $(seq 1 $T26_N); do + ./$pub_bin -T -h 127.0.0.1 -p $port -n "test/burst" -q 1 \ + -m "burst_${t26_i}" -i "t26_pub_${t26_i}" \ + >>"${TMP_DIR}/t26_pub.log" 2>&1 +done +# mqtt-sub has no "exit after N messages" mode, so it never exits on +# its own. Give the broker a grace window to fan out all 20 messages +# (each publisher is a separate connect/pub/disconnect; the broker's +# outbound drain may lag the last publish a bit on slow CI), then kill +# the subscriber so its log is flushed and we can count what arrived. +sleep 2 +kill $T26_SUB_PID 2>/dev/null +wait $T26_SUB_PID 2>/dev/null || true +TEST_PIDS=() +# Count how many of the burst_N tokens reached the subscriber and check +# that they arrived in publish order (1, 2, ..., N). +T26_RECEIVED=$(grep -oE 'burst_[0-9]+' "${TMP_DIR}/t26_sub.log" 2>/dev/null \ + | wc -l | tr -d ' ') +T26_ORDER_OK=yes +T26_LAST=0 +while read -r tok; do + n=${tok#burst_} + if [ "$n" -le "$T26_LAST" ]; then + T26_ORDER_OK=no + fi + T26_LAST=$n +done < <(grep -oE 'burst_[0-9]+' "${TMP_DIR}/t26_sub.log" 2>/dev/null) +if [ "$T26_RECEIVED" -eq "$T26_N" ] && [ "$T26_ORDER_OK" = "yes" ]; then + echo "PASS: QoS 1 burst delivery (received=$T26_RECEIVED order=ok)" +else + echo "FAIL: Burst delivery (received=$T26_RECEIVED expected=$T26_N " \ + "order=$T26_ORDER_OK)" + echo "--- t26 broker_log tail ---" + tail -80 "$broker_log" 2>/dev/null || true + echo "--- t26_sub.log tail ---" + tail -40 "${TMP_DIR}/t26_sub.log" 2>/dev/null || true + echo "--- t26_pub.log tail ---" + tail -40 "${TMP_DIR}/t26_pub.log" 2>/dev/null || true + echo "--- t26 end ---" + FAIL=1 +fi +fi # skip_plain + +# --- Test 27: Persist round-trip across broker restart --- +# Validates the --enable-broker-persist hooks: starting the broker with +# -D , retaining a message, then restarting against the same +# directory must deliver the retained message to a new subscriber. +echo "" +echo "--- Test 27: Persist round-trip across restart ---" +if [ "$skip_plain" = "yes" ]; then + echo "SKIP: Persist round-trip (plain listener disabled)" +elif [ "$has_persist" = "no" ]; then + echo "SKIP: Persist round-trip (built without --enable-broker-persist)" +elif [ "$has_retained" = "no" ]; then + echo "SKIP: Persist round-trip (retained support not built)" +elif [ "$has_persist_encrypt" = "yes" ] && \ + [ "$has_persist_encrypt_dev_key" = "no" ]; then + echo "SKIP: Persist round-trip (encrypt build without dev-key CLI hook)" +else +T27_DIR="${TMP_DIR}/persist_t27" +mkdir -p "$T27_DIR" +# Stop any existing broker started by previous tests. +if [ $broker_pid != $no_pid ]; then + kill $broker_pid 2>/dev/null + wait $broker_pid 2>/dev/null || true + broker_pid=$no_pid +fi +generate_port +broker_log="${TMP_DIR}/t27_broker1.log" +./$broker_bin -p $port -D "$T27_DIR" $broker_dir_flags >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +# Retain a message +./$pub_bin -T -h 127.0.0.1 -p $port -n "test/persist_t27" \ + -m "t27_payload" -r >"${TMP_DIR}/t27_pub.log" 2>&1 +sleep 0.2 +# Stop broker (graceful) +kill $broker_pid 2>/dev/null +wait $broker_pid 2>/dev/null || true +broker_pid=$no_pid +# Restart against same dir +broker_log="${TMP_DIR}/t27_broker2.log" +./$broker_bin -p $port -D "$T27_DIR" $broker_dir_flags >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +# Confirm restore line printed (defense in depth: subscriber confirms anyway) +T27_RESTORED=no +grep -q "persist restore retained loaded=1" "$broker_log" 2>/dev/null \ + && T27_RESTORED=yes +# New subscriber should receive the retained payload +rm -f "${TMP_DIR}/t27_sub.ready" +timeout 5 ./$sub_bin -T -h 127.0.0.1 -p $port -n "test/persist_t27" \ + -i "t27_sub" -R "${TMP_DIR}/t27_sub.ready" \ + >"${TMP_DIR}/t27_sub.log" 2>&1 & +T27_SUB_PID=$! +TEST_PIDS+=($T27_SUB_PID) +wait_for_file "${TMP_DIR}/t27_sub.ready" 3 +sleep 0.3 +kill $T27_SUB_PID 2>/dev/null +wait $T27_SUB_PID 2>/dev/null || true +TEST_PIDS=() +T27_GOT=no +grep -q "t27_payload" "${TMP_DIR}/t27_sub.log" 2>/dev/null && T27_GOT=yes +# On encrypt builds broker_dir_flags includes -E dev, so T27 ends up +# exercising the encrypted-at-rest round-trip too (T31 still adds the +# specific no-plaintext-on-disk check). Annotate so the log is honest +# about what was covered. +T27_MODE="plaintext" +if [ "$has_persist_encrypt_dev_key" = "yes" ]; then + T27_MODE="encrypted via -E dev" +fi +if [ "$T27_GOT" = "yes" ] && [ "$T27_RESTORED" = "yes" ]; then + echo "PASS: Persist round-trip ($T27_MODE; restored=$T27_RESTORED delivered=$T27_GOT)" +else + echo "FAIL: Persist round-trip ($T27_MODE; restored=$T27_RESTORED delivered=$T27_GOT)" + FAIL=1 +fi +fi # has_persist + has_retained + +# --- Test 28: Persist schema-mismatch wipe-and-restart --- +# Corrupt a persisted record by stomping its schema_ver bytes. The +# broker must log "schema mismatch", overwrite META, and start cleanly. +# Skipped in encrypted-persist builds: an attacker-supplied plaintext +# bogus record gets rejected by the decrypt stage before the schema +# check, so the wipe path is exercised via the plaintext build matrix +# entry instead. +echo "" +echo "--- Test 28: Persist schema mismatch ---" +t28_persist_encrypt=no +echo "$broker_features" | grep -q " persist-encrypt" && \ + t28_persist_encrypt=yes +if [ "$skip_plain" = "yes" ]; then + echo "SKIP: Persist schema mismatch (plain listener disabled)" +elif [ "$has_persist" = "no" ]; then + echo "SKIP: Persist schema mismatch (built without --enable-broker-persist)" +elif [ "$t28_persist_encrypt" = "yes" ]; then + echo "SKIP: Persist schema mismatch (encrypted build - covered by plaintext matrix entry)" +else +T28_DIR="${TMP_DIR}/persist_t28" +mkdir -p "$T28_DIR/1" +# Write a fake META file with a bogus schema version. Magic "WMQB", +# then schema_ver = 0xFFFF (big endian), rec_kind = 1, body_len = 4, +# body = some bytes. +# Filename "00.bin" reflects the POSIX backend's key-to-filename +# encoding: META uses a single 0x00 key byte, which wmqb_hex_encode +# (in src/mqtt_broker_persist_posix.c) renders as two lowercase hex +# chars - "00". If that encoding ever changes, update this filename +# to match. +printf 'WMQB\xff\xff\x00\x01\x00\x00\x00\x04zzzz' > "$T28_DIR/1/00.bin" +if [ $broker_pid != $no_pid ]; then + kill $broker_pid 2>/dev/null + wait $broker_pid 2>/dev/null || true + broker_pid=$no_pid +fi +generate_port +broker_log="${TMP_DIR}/t28_broker.log" +./$broker_bin -p $port -D "$T28_DIR" $broker_dir_flags >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +T28_WIPED=no +grep -q "schema mismatch" "$broker_log" 2>/dev/null && T28_WIPED=yes +# Broker should still be functional - run a basic pub/sub against it. +./$client_bin -T -h 127.0.0.1 -p $port -n "test/persist_t28" -C 3000 \ + >"${TMP_DIR}/t28_client.log" 2>&1 +T28_CLIENT_RC=$? +if [ "$T28_WIPED" = "yes" ] && [ $T28_CLIENT_RC -eq 0 ]; then + echo "PASS: Persist schema mismatch (wiped=$T28_WIPED rc=$T28_CLIENT_RC)" +else + echo "FAIL: Persist schema mismatch (wiped=$T28_WIPED rc=$T28_CLIENT_RC)" + FAIL=1 +fi +fi # has_persist + +# --- Test 29: Offline queue across reconnect (same broker) --- +# A persistent sub disconnects, publisher sends QoS 1 messages, sub +# reconnects and receives the queue. Exercises the orphan-session pool +# without going through a broker restart. +echo "" +echo "--- Test 29: Offline queue across reconnect ---" +if [ "$skip_plain" = "yes" ]; then + echo "SKIP: Offline queue (plain listener disabled)" +elif [ "$has_persist" = "no" ]; then + echo "SKIP: Offline queue (built without --enable-broker-persist)" +elif [ "$has_static_memory" = "yes" ]; then + echo "SKIP: Offline queue (orphan/outbound-queue is dynamic-memory only)" +elif [ "$has_persist_encrypt" = "yes" ] && \ + [ "$has_persist_encrypt_dev_key" = "no" ]; then + echo "SKIP: Offline queue (encrypt build without dev-key CLI hook)" +else +T29_DIR="${TMP_DIR}/persist_t29" +mkdir -p "$T29_DIR" +if [ $broker_pid != $no_pid ]; then + kill $broker_pid 2>/dev/null + wait $broker_pid 2>/dev/null || true + broker_pid=$no_pid +fi +generate_port +broker_log="${TMP_DIR}/t29_broker.log" +./$broker_bin -p $port -D "$T29_DIR" $broker_dir_flags >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +# 1. Persistent sub connects, subscribes; SIGKILL to disconnect +# abruptly (graceful exit sends UNSUBSCRIBE which would tear down +# the subscription and defeat the orphan path). +rm -f "${TMP_DIR}/t29_first.ready" +./$sub_bin -T -h 127.0.0.1 -p $port -n "test/offlineq" -q 1 \ + -i "t29_sub" -s \ + -R "${TMP_DIR}/t29_first.ready" \ + >"${TMP_DIR}/t29_first.log" 2>&1 & +T29_FIRST_PID=$! +TEST_PIDS+=($T29_FIRST_PID) +wait_for_file "${TMP_DIR}/t29_first.ready" 5 +kill -9 $T29_FIRST_PID 2>/dev/null +wait $T29_FIRST_PID 2>/dev/null || true +TEST_PIDS=() +sleep 0.5 +# 2. Publish 3 QoS 1 messages while disconnected. +for t29_i in 1 2 3; do + ./$pub_bin -T -h 127.0.0.1 -p $port -n "test/offlineq" -q 1 \ + -m "off_${t29_i}" -i "t29_pub_${t29_i}" \ + >>"${TMP_DIR}/t29_pub.log" 2>&1 +done +sleep 0.5 +# 3. Reconnect with same client_id + clean_session=0; receive backlog. +# Use a generous cmd_timeout_ms (-C 5000) so the client's CONNACK wait +# survives the orphan-reassociate + reclaim path; 3 ms races on slow +# CI runners (broker writes CONNACK after client has already closed, +# leaving the drained PUBLISHes to fail with EPIPE). +timeout 8 ./$sub_bin -T -h 127.0.0.1 -p $port -n "test/offlineq" -q 1 \ + -i "t29_sub" -s -x -C 5000 -R "${TMP_DIR}/t29_sub.ready" \ + >"${TMP_DIR}/t29_sub.log" 2>&1 & +T29_PID=$! +TEST_PIDS+=($T29_PID) +wait_for_file "${TMP_DIR}/t29_sub.ready" 5 +sleep 1 +kill $T29_PID 2>/dev/null +wait $T29_PID 2>/dev/null || true +TEST_PIDS=() +T29_RECV=$(grep -oE 'off_[0-9]+' "${TMP_DIR}/t29_sub.log" 2>/dev/null \ + | wc -l) +if [ "$T29_RECV" -ge 3 ]; then + echo "PASS: Offline queue across reconnect (received=$T29_RECV)" +else + echo "FAIL: Offline queue across reconnect (received=$T29_RECV expected>=3)" + FAIL=1 +fi +fi # has_persist (t29) + +# --- Test 30: Offline queue across broker restart --- +# Same as Test 29 but stops the broker between publish and reconnect to +# confirm NS_OUTQ records replay on restart. +echo "" +echo "--- Test 30: Offline queue across broker restart ---" +if [ "$skip_plain" = "yes" ]; then + echo "SKIP: Offline queue restart (plain listener disabled)" +elif [ "$has_persist" = "no" ]; then + echo "SKIP: Offline queue restart (built without --enable-broker-persist)" +elif [ "$has_static_memory" = "yes" ]; then + echo "SKIP: Offline queue restart (orphan/outbound-queue is dynamic-memory only)" +elif [ "$has_persist_encrypt" = "yes" ] && \ + [ "$has_persist_encrypt_dev_key" = "no" ]; then + echo "SKIP: Offline queue restart (encrypt build without dev-key CLI hook)" +else +T30_DIR="${TMP_DIR}/persist_t30" +mkdir -p "$T30_DIR" +if [ $broker_pid != $no_pid ]; then + kill $broker_pid 2>/dev/null + wait $broker_pid 2>/dev/null || true + broker_pid=$no_pid +fi +generate_port +broker_log="${TMP_DIR}/t30_broker1.log" +./$broker_bin -p $port -D "$T30_DIR" $broker_dir_flags >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +rm -f "${TMP_DIR}/t30_first.ready" +./$sub_bin -T -h 127.0.0.1 -p $port -n "test/restartq" -q 1 \ + -i "t30_sub" -s \ + -R "${TMP_DIR}/t30_first.ready" \ + >"${TMP_DIR}/t30_first.log" 2>&1 & +T30_FIRST_PID=$! +TEST_PIDS+=($T30_FIRST_PID) +wait_for_file "${TMP_DIR}/t30_first.ready" 5 +kill -9 $T30_FIRST_PID 2>/dev/null +wait $T30_FIRST_PID 2>/dev/null || true +TEST_PIDS=() +sleep 0.5 +for t30_i in 1 2 3; do + ./$pub_bin -T -h 127.0.0.1 -p $port -n "test/restartq" -q 1 \ + -m "rst_${t30_i}" -i "t30_pub_${t30_i}" \ + >>"${TMP_DIR}/t30_pub.log" 2>&1 +done +sleep 0.5 +# Restart broker against same directory +kill $broker_pid 2>/dev/null +wait $broker_pid 2>/dev/null || true +broker_pid=$no_pid +broker_log="${TMP_DIR}/t30_broker2.log" +./$broker_bin -p $port -D "$T30_DIR" $broker_dir_flags >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +T30_REPLAY=no +grep -q "persist restore outq loaded=3" "$broker_log" 2>/dev/null \ + && T30_REPLAY=yes +# Same -C 5000 rationale as Test 29; the restored-from-disk path is +# even more sensitive to a short client timeout. +timeout 8 ./$sub_bin -T -h 127.0.0.1 -p $port -n "test/restartq" -q 1 \ + -i "t30_sub" -s -x -C 5000 -R "${TMP_DIR}/t30_sub.ready" \ + >"${TMP_DIR}/t30_sub.log" 2>&1 & +T30_PID=$! +TEST_PIDS+=($T30_PID) +wait_for_file "${TMP_DIR}/t30_sub.ready" 5 +sleep 1 +kill $T30_PID 2>/dev/null +wait $T30_PID 2>/dev/null || true +TEST_PIDS=() +T30_RECV=$(grep -oE 'rst_[0-9]+' "${TMP_DIR}/t30_sub.log" 2>/dev/null \ + | wc -l) +if [ "$T30_REPLAY" = "yes" ] && [ "$T30_RECV" -ge 3 ]; then + echo "PASS: Offline queue across restart (replay=$T30_REPLAY recv=$T30_RECV)" +else + echo "FAIL: Offline queue across restart (replay=$T30_REPLAY recv=$T30_RECV)" + FAIL=1 +fi +fi # has_persist (t30) + +# --- Test 31: AES-GCM encrypted records at rest --- +# Only meaningful when the broker is built with +# --enable-broker-persist-encrypt. Records on disk must not contain the +# plaintext payload string; round-trip must still work end-to-end. +echo "" +echo "--- Test 31: AES-GCM encrypted persist round-trip ---" +# has_persist_encrypt was detected at startup. +if [ "$skip_plain" = "yes" ]; then + echo "SKIP: AES-GCM persist (plain listener disabled)" +elif [ "$has_persist_encrypt" = "no" ]; then + echo "SKIP: AES-GCM persist (built without --enable-broker-persist-encrypt)" +elif [ "$has_persist_encrypt_dev_key" = "no" ]; then + echo "SKIP: AES-GCM persist (built without --enable-broker-persist-encrypt-dev-key)" +elif [ "$has_retained" = "no" ]; then + echo "SKIP: AES-GCM persist (retained support not built)" +else +T31_DIR="${TMP_DIR}/persist_t31" +mkdir -p "$T31_DIR" +if [ $broker_pid != $no_pid ]; then + kill $broker_pid 2>/dev/null + wait $broker_pid 2>/dev/null || true + broker_pid=$no_pid +fi +generate_port +broker_log="${TMP_DIR}/t31_broker1.log" +# -E dev opts into the development hard-coded key. Production builds +# install MqttBrokerPersistHooks.derive_key instead and never reach +# this flag. +./$broker_bin -p $port -D "$T31_DIR" -E dev >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +./$pub_bin -T -h 127.0.0.1 -p $port -n "test/secret_t31" \ + -m "t31_secret_payload" -r >"${TMP_DIR}/t31_pub.log" 2>&1 +sleep 0.3 +# On-disk record must NOT contain the plaintext string. +T31_LEAK=no +if grep -ql "t31_secret_payload" "$T31_DIR"/4/*.bin 2>/dev/null; then + T31_LEAK=yes +fi +# Restart broker - decryption + retained replay should still work. +kill $broker_pid 2>/dev/null +wait $broker_pid 2>/dev/null || true +broker_pid=$no_pid +broker_log="${TMP_DIR}/t31_broker2.log" +./$broker_bin -p $port -D "$T31_DIR" -E dev >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +rm -f "${TMP_DIR}/t31_sub.ready" +timeout 5 ./$sub_bin -T -h 127.0.0.1 -p $port -n "test/secret_t31" \ + -i "t31_sub" -R "${TMP_DIR}/t31_sub.ready" \ + >"${TMP_DIR}/t31_sub.log" 2>&1 & +T31_PID=$! +TEST_PIDS+=($T31_PID) +wait_for_file "${TMP_DIR}/t31_sub.ready" 3 +sleep 0.5 +kill $T31_PID 2>/dev/null +wait $T31_PID 2>/dev/null || true +TEST_PIDS=() +T31_RECV=no +grep -q "t31_secret_payload" "${TMP_DIR}/t31_sub.log" 2>/dev/null \ + && T31_RECV=yes +if [ "$T31_LEAK" = "no" ] && [ "$T31_RECV" = "yes" ]; then + echo "PASS: AES-GCM persist (no-leak + decrypt round-trip)" +else + echo "FAIL: AES-GCM persist (leak=$T31_LEAK recv=$T31_RECV)" + FAIL=1 +fi +fi # has_persist_encrypt + +# --- Test 32: Schema-mismatch wipe deletes every stale file --- +# Test 28 already verifies that a single bogus META makes the broker +# overwrite META, but doesn't check that OTHER namespaces get cleaned. +# This test plants stray files in every namespace and confirms they all +# disappear after the broker comes up. +echo "" +echo "--- Test 32: Schema mismatch wipes every namespace ---" +if [ "$skip_plain" = "yes" ]; then + echo "SKIP: Schema wipe full (plain listener disabled)" +elif [ "$has_persist" = "no" ]; then + echo "SKIP: Schema wipe full (built without --enable-broker-persist)" +elif [ "$has_persist_encrypt" = "yes" ]; then + echo "SKIP: Schema wipe full (encrypted build - exercised by plaintext matrix entry)" +elif [ "$has_static_memory" = "yes" ]; then + echo "SKIP: Schema wipe full (active wipe skipped in static-memory mode by design)" +else +T32_DIR="${TMP_DIR}/persist_t32" +rm -rf "$T32_DIR" +mkdir -p "$T32_DIR/1" "$T32_DIR/2" "$T32_DIR/3" "$T32_DIR/4" "$T32_DIR/5" +# Bogus META with wrong schema version (0xFFFF). +printf 'WMQB\xff\xff\x00\x01\x00\x00\x00\x04zzzz' > "$T32_DIR/1/00.bin" +# Plant a stray file in each other namespace. +echo "stray-session" > "$T32_DIR/2/aaaa.bin" +echo "stray-subs" > "$T32_DIR/3/bbbb.bin" +echo "stray-ret" > "$T32_DIR/4/cccc.bin" +echo "stray-outq" > "$T32_DIR/5/dddd.bin" +T32_BEFORE=$(find "$T32_DIR" -type f -name '*.bin' | wc -l) +if [ $broker_pid != $no_pid ]; then + kill $broker_pid 2>/dev/null + wait $broker_pid 2>/dev/null || true + broker_pid=$no_pid +fi +generate_port +broker_log="${TMP_DIR}/t32_broker.log" +./$broker_bin -p $port -D "$T32_DIR" $broker_dir_flags >"$broker_log" 2>&1 & +broker_pid=$! +check_broker +# After wipe-and-restart only the fresh META record should remain. +T32_AFTER=$(find "$T32_DIR" -type f -name '*.bin' | wc -l) +T32_WIPED=no +grep -q "persist wipe deleted" "$broker_log" 2>/dev/null \ + && T32_WIPED=yes +if [ "$T32_BEFORE" -eq 5 ] && [ "$T32_AFTER" -eq 1 ] && \ + [ "$T32_WIPED" = "yes" ]; then + echo "PASS: Schema wipe (before=$T32_BEFORE after=$T32_AFTER)" +else + echo "FAIL: Schema wipe (before=$T32_BEFORE after=$T32_AFTER wiped=$T32_WIPED)" + FAIL=1 +fi +fi # has_persist (t32) + # --- WebSocket Tests --- ws_client_bin="examples/websocket/websocket_client" has_websocket=no diff --git a/src/include.am b/src/include.am index 61283ab8..f6160f12 100644 --- a/src/include.am +++ b/src/include.am @@ -22,7 +22,9 @@ EXTRA_DIST += if BUILD_BROKER bin_PROGRAMS += src/mqtt_broker -src_mqtt_broker_SOURCES = src/mqtt_broker.c +src_mqtt_broker_SOURCES = src/mqtt_broker.c \ + src/mqtt_broker_persist.c \ + src/mqtt_broker_persist_posix.c src_mqtt_broker_CFLAGS = $(AM_CFLAGS) src_mqtt_broker_CPPFLAGS = $(AM_CPPFLAGS) src_mqtt_broker_LDFLAGS = -Lsrc diff --git a/src/mqtt_broker.c b/src/mqtt_broker.c index 19980930..3f255616 100644 --- a/src/mqtt_broker.c +++ b/src/mqtt_broker.c @@ -1219,6 +1219,12 @@ static void BrokerWs_Free(MqttBroker* broker) #endif /* ENABLE_MQTT_WEBSOCKET */ +/* BrokerNextPacketId forward declaration. The body lives in the broker + * core section below. Used by both the WebSocket branch and the orphan + * enqueue helpers earlier in the file, so the forward decl is hoisted + * out of the ENABLE_MQTT_WEBSOCKET guard. */ +static word16 BrokerNextPacketId(MqttBroker* broker); + /* -------------------------------------------------------------------------- */ /* Per-client MqttNet callbacks (route through MqttBrokerNet) */ /* -------------------------------------------------------------------------- */ @@ -1432,6 +1438,400 @@ static void BrokerInboundQos2_Clear(BrokerClient* bc) } #endif /* WOLFMQTT_MAX_QOS >= 2 */ +#ifndef WOLFMQTT_STATIC_MEMORY +/* -------------------------------------------------------------------------- */ +/* Per-subscriber outbound publish queue (dynamic memory only). + * + * Adds the message-shaping layer asked for in customer report #7 (ordered + * delivery) and consumed later by report-#5 follow-up work and PR2's + * offline queue. Fan-out enqueues; drain dispatches up to the inflight + * cap. Drain is also called from PUBACK / PUBREC / PUBCOMP handlers and + * once per select() tick so a slow subscriber that just opened a window + * gets unblocked promptly. */ +/* -------------------------------------------------------------------------- */ + +/* Free a single queue entry (topic, payload, the entry itself). */ +static void BrokerOutPub_Free(BrokerOutPub* e) +{ + if (e == NULL) { + return; + } + if (e->topic != NULL) { + WOLFMQTT_FREE(e->topic); + e->topic = NULL; + } + if (e->payload != NULL) { + WOLFMQTT_FREE(e->payload); + e->payload = NULL; + } + WOLFMQTT_FREE(e); +} + +/* Allocate a new entry holding a deep copy of topic + payload. Returns + * NULL on allocation failure (caller decides whether that means drop or + * close). All fields are zero-initialized; caller fills qos / packet_id / + * etc. and links into out_q via BrokerClient_EnqueueOutPub. */ +static BrokerOutPub* BrokerOutPub_Alloc(const char* topic, + const byte* payload, word32 payload_len) +{ + BrokerOutPub* e; + size_t topic_len; + + if (topic == NULL) { + return NULL; + } + topic_len = XSTRLEN(topic); + + e = (BrokerOutPub*)WOLFMQTT_MALLOC(sizeof(BrokerOutPub)); + if (e == NULL) { + return NULL; + } + XMEMSET(e, 0, sizeof(*e)); + + e->topic = (char*)WOLFMQTT_MALLOC(topic_len + 1); + if (e->topic == NULL) { + BrokerOutPub_Free(e); + return NULL; + } + XMEMCPY(e->topic, topic, topic_len); + e->topic[topic_len] = '\0'; + + if (payload_len > 0 && payload != NULL) { + e->payload = (byte*)WOLFMQTT_MALLOC(payload_len); + if (e->payload == NULL) { + BrokerOutPub_Free(e); + return NULL; + } + XMEMCPY(e->payload, payload, payload_len); + e->payload_len = payload_len; + } + return e; +} + +/* Append e to the subscriber's out_q tail. Caller is responsible for + * counting against any caps before allocation. */ +static void BrokerClient_EnqueueOutPub(BrokerClient* bc, BrokerOutPub* e) +{ + if (bc == NULL || e == NULL) { + return; + } + e->next = NULL; + if (bc->out_q_tail == NULL) { + bc->out_q_head = e; + bc->out_q_tail = e; + } + else { + bc->out_q_tail->next = e; + bc->out_q_tail = e; + } + bc->out_q_count++; +} + +/* Walk out_q and free every entry. Called from BrokerClient_Free. */ +static void BrokerClient_FreeOutQueue(BrokerClient* bc) +{ + BrokerOutPub* cur; + + if (bc == NULL) { + return; + } + cur = bc->out_q_head; + while (cur != NULL) { + BrokerOutPub* next = cur->next; + BrokerOutPub_Free(cur); + cur = next; + } + bc->out_q_head = NULL; + bc->out_q_tail = NULL; + bc->out_q_count = 0; + bc->out_q_inflight = 0; +} + +/* Send as many QUEUED entries from out_q as the inflight cap allows. + * + * Ordering: walks from out_q_head, never reorders. Already-sent entries + * (state != QUEUED) are stepped over - their PUBLISH already hit the + * wire in publish order. The cap stops the drain at the first QUEUED + * QoS>0 entry that would exceed BROKER_MAX_INFLIGHT_PER_SUB (or the v5 + * client's Receive Maximum, whichever is smaller). [MQTT-4.6.0-3] is + * preserved: even a QUEUED QoS 0 behind a capped QoS>0 stays put. */ +static void BrokerClient_DrainOutQueue(BrokerClient* bc) +{ + BrokerOutPub* cur; + BrokerOutPub* prev; + int effective_cap; + + if (bc == NULL || bc->out_q_head == NULL) { + return; + } + + effective_cap = BROKER_MAX_INFLIGHT_PER_SUB; + if (bc->client_receive_max != 0 && + (int)bc->client_receive_max < effective_cap) { + effective_cap = (int)bc->client_receive_max; + } + + prev = NULL; + cur = bc->out_q_head; + while (cur != NULL) { + MqttPublish out_pub; + int enc_rc; + + if (cur->state != BROKER_OUTQ_QUEUED) { + prev = cur; + cur = cur->next; + continue; + } + if (cur->qos > MQTT_QOS_0 && bc->out_q_inflight >= effective_cap) { + /* Cap reached. Stop here - cannot send anything behind it + * either, because [MQTT-4.6.0-3] requires ordered delivery. */ + break; + } + + XMEMSET(&out_pub, 0, sizeof(out_pub)); + out_pub.topic_name = cur->topic; + out_pub.qos = cur->qos; + out_pub.packet_id = cur->packet_id; + out_pub.retain = cur->retain; + /* MQTT-4.4.0-1: DUP=1 on re-send of an unacked PUBLISH after + * session resumption. Set by BrokerOrphan_Reclaim when an + * entry was previously in PUBLISH_SENT and got reset to + * QUEUED here for retransmit. */ + out_pub.duplicate = cur->retransmit_dup; + out_pub.buffer = cur->payload; + out_pub.total_len = cur->payload_len; + #ifdef WOLFMQTT_V5 + out_pub.protocol_level = cur->protocol_level; + #endif + + enc_rc = MqttEncode_Publish(bc->tx_buf, BROKER_CLIENT_TX_SZ(bc), + &out_pub, 0); + if (enc_rc <= 0) { + WBLOG_ERR(bc->broker, + "broker: drain encode failed sock=%d topic=%s rc=%d", + (int)bc->sock, cur->topic, enc_rc); + /* Drop just this entry and continue. Encoding failure for + * a single message is not fatal to the connection. */ + if (prev == NULL) { + bc->out_q_head = cur->next; + } + else { + prev->next = cur->next; + } + if (bc->out_q_tail == cur) { + bc->out_q_tail = prev; + } + bc->out_q_count--; + { + BrokerOutPub* free_me = cur; + cur = cur->next; + #ifdef WOLFMQTT_BROKER_PERSIST + /* If this entry was previously shadow-written (orphan + * path), drop the disk record now so a future restart + * cannot replay an undeliverable message. Idempotent + * for entries that were never persisted (QoS 0, or + * not-yet-orphaned). */ + if (free_me->qos > MQTT_QOS_0 && + BROKER_STR_VALID(bc->client_id)) { + (void)BrokerPersist_DelOutPub(bc->broker, + bc->client_id, free_me->packet_id); + } + #endif + BrokerOutPub_Free(free_me); + } + continue; + } + { + int wr_rc; + wr_rc = MqttPacket_Write(&bc->client, bc->tx_buf, enc_rc); + if (wr_rc < 0) { + /* Socket dropped (EPIPE/ECONNRESET/etc). Leave this + * entry in QUEUED state, do not advance, do not bump + * inflight. The broker's read path will detect the + * close on the next step and re-orphan the client; + * still-QUEUED entries follow the orphan and replay + * on the next reconnect. Subsequent writes on the + * same dead socket would just stack more errors, so + * stop the drain here. */ + WBLOG_ERR(bc->broker, + "broker: drain write failed sock=%d topic=%s rc=%d", + (int)bc->sock, cur->topic, wr_rc); + return; + } + } + WBLOG_DBG(bc->broker, + "broker: drain send sock=%d topic=%s qos=%d packet_id=%u dup=%d", + (int)bc->sock, cur->topic, (int)cur->qos, + (unsigned)cur->packet_id, (int)cur->retransmit_dup); + cur->retransmit_dup = 0; + + if (cur->qos == MQTT_QOS_0) { + BrokerOutPub* free_me = cur; + if (prev == NULL) { + bc->out_q_head = cur->next; + } + else { + prev->next = cur->next; + } + if (bc->out_q_tail == cur) { + bc->out_q_tail = prev; + } + bc->out_q_count--; + cur = cur->next; + BrokerOutPub_Free(free_me); + /* prev unchanged */ + } + else { + cur->state = BROKER_OUTQ_PUBLISH_SENT; + bc->out_q_inflight++; + prev = cur; + cur = cur->next; + } + } +} + +/* Locate the queue entry that matches packet_id and is awaiting an ack + * in the given expected_state. Returns NULL if no match (e.g., spurious + * ack, or our state has already moved on). On match returns the entry + * (still linked) and sets *out_prev to the predecessor (or NULL when + * the match is at head) so the caller can unlink in O(1). */ +static BrokerOutPub* BrokerClient_FindOutPub(BrokerClient* bc, + word16 packet_id, byte expected_state, BrokerOutPub** out_prev) +{ + BrokerOutPub* prev = NULL; + BrokerOutPub* cur; + + if (bc == NULL || packet_id == 0) { + return NULL; + } + cur = bc->out_q_head; + while (cur != NULL) { + if (cur->packet_id == packet_id && cur->state == expected_state) { + if (out_prev != NULL) { + *out_prev = prev; + } + return cur; + } + prev = cur; + cur = cur->next; + } + return NULL; +} + +/* Unlink and free the entry; decrement inflight if it was counted. */ +static void BrokerClient_UnlinkOutPub(BrokerClient* bc, BrokerOutPub* prev, + BrokerOutPub* e) +{ + if (bc == NULL || e == NULL) { + return; + } + if (prev == NULL) { + bc->out_q_head = e->next; + } + else { + prev->next = e->next; + } + if (bc->out_q_tail == e) { + bc->out_q_tail = prev; + } + bc->out_q_count--; + if (e->state == BROKER_OUTQ_PUBLISH_SENT || + e->state == BROKER_OUTQ_PUBREL_SENT) { + if (bc->out_q_inflight > 0) { + bc->out_q_inflight--; + } + } + BrokerOutPub_Free(e); +} + +/* PUBACK from subscriber - completes a QoS 1 delivery. */ +static void BrokerClient_OnPubAck(BrokerClient* bc, word16 packet_id) +{ + BrokerOutPub* prev = NULL; + BrokerOutPub* e; + + if (bc == NULL) { + return; + } + e = BrokerClient_FindOutPub(bc, packet_id, BROKER_OUTQ_PUBLISH_SENT, + &prev); + if (e == NULL) { + WBLOG_DBG(bc->broker, + "broker: spurious PUBACK sock=%d packet_id=%u", + (int)bc->sock, (unsigned)packet_id); + return; + } + BrokerClient_UnlinkOutPub(bc, prev, e); +#ifdef WOLFMQTT_BROKER_PERSIST + /* Defense in depth: in normal flow the orphan-reclaim path already + * wiped this client's disk records, but if the entry was ever + * shadow-written (e.g., a previous orphan cycle) make sure the + * on-disk record is gone so a crash before the next reclaim cannot + * cause duplicate redelivery. Idempotent on a missing key. */ + if (BROKER_STR_VALID(bc->client_id)) { + (void)BrokerPersist_DelOutPub(bc->broker, bc->client_id, packet_id); + } +#endif + BrokerClient_DrainOutQueue(bc); +} + +#if WOLFMQTT_MAX_QOS >= 2 +/* PUBREC from subscriber - advance the QoS 2 entry to PUBREL_SENT. + * Returns 1 if a matching entry was found (so the caller knows whether + * the PUBREL we send is correlated to a real outbound message), 0 + * otherwise. The wire response is still sent in both cases to remain + * idempotent for buggy peers. */ +static int BrokerClient_OnPubRec(BrokerClient* bc, word16 packet_id) +{ + BrokerOutPub* prev = NULL; + BrokerOutPub* e; + + if (bc == NULL) { + return 0; + } + e = BrokerClient_FindOutPub(bc, packet_id, BROKER_OUTQ_PUBLISH_SENT, + &prev); + if (e == NULL) { + WBLOG_DBG(bc->broker, + "broker: spurious PUBREC sock=%d packet_id=%u", + (int)bc->sock, (unsigned)packet_id); + return 0; + } + e->state = BROKER_OUTQ_PUBREL_SENT; + /* Inflight stays counted - the delivery is still outstanding until + * PUBCOMP returns. */ + return 1; +} + +/* PUBCOMP from subscriber - completes a QoS 2 delivery. */ +static void BrokerClient_OnPubComp(BrokerClient* bc, word16 packet_id) +{ + BrokerOutPub* prev = NULL; + BrokerOutPub* e; + + if (bc == NULL) { + return; + } + e = BrokerClient_FindOutPub(bc, packet_id, BROKER_OUTQ_PUBREL_SENT, + &prev); + if (e == NULL) { + WBLOG_DBG(bc->broker, + "broker: spurious PUBCOMP sock=%d packet_id=%u", + (int)bc->sock, (unsigned)packet_id); + return; + } + BrokerClient_UnlinkOutPub(bc, prev, e); +#ifdef WOLFMQTT_BROKER_PERSIST + /* See BrokerClient_OnPubAck: defense-in-depth disk record purge. */ + if (BROKER_STR_VALID(bc->client_id)) { + (void)BrokerPersist_DelOutPub(bc->broker, bc->client_id, packet_id); + } +#endif + BrokerClient_DrainOutQueue(bc); +} +#endif /* WOLFMQTT_MAX_QOS >= 2 */ +#endif /* !WOLFMQTT_STATIC_MEMORY */ + static void BrokerClient_Free(BrokerClient* bc) { if (bc == NULL) { @@ -1440,6 +1840,9 @@ static void BrokerClient_Free(BrokerClient* bc) #if WOLFMQTT_MAX_QOS >= 2 BrokerInboundQos2_Clear(bc); #endif +#ifndef WOLFMQTT_STATIC_MEMORY + BrokerClient_FreeOutQueue(bc); +#endif #ifdef ENABLE_MQTT_WEBSOCKET if (bc->ws_ctx != NULL) { @@ -1643,6 +2046,410 @@ static void BrokerClient_Remove(MqttBroker* broker, BrokerClient* bc) /* Orphan subscriptions for session persistence (clean_session=0). * Sets client pointer to NULL but keeps the subscription for reconnect. */ +#ifndef WOLFMQTT_STATIC_MEMORY +/* -------------------------------------------------------------------------- */ +/* Orphan session pool (dynamic memory only). */ +/* -------------------------------------------------------------------------- */ + +/* Find the orphan slot whose client_id matches. NULL if none. */ +static BrokerOrphanSession* BrokerOrphan_Find(MqttBroker* broker, + const char* client_id) +{ + BrokerOrphanSession* cur; + if (broker == NULL || client_id == NULL) { + return NULL; + } + for (cur = broker->orphan_sessions; cur != NULL; cur = cur->next) { + if (cur->client_id != NULL && + XSTRCMP(cur->client_id, client_id) == 0) { + return cur; + } + } + return NULL; +} + +/* Free everything an orphan owns (queue entries + client_id) but do + * NOT unlink from broker->orphan_sessions; the caller does that. */ +static void BrokerOrphan_FreeContents(BrokerOrphanSession* o) +{ + BrokerOutPub* cur; + if (o == NULL) { + return; + } + cur = o->out_q_head; + while (cur != NULL) { + BrokerOutPub* next = cur->next; + if (cur->topic != NULL) { + WOLFMQTT_FREE(cur->topic); + } + if (cur->payload != NULL) { + WOLFMQTT_FREE(cur->payload); + } + WOLFMQTT_FREE(cur); + cur = next; + } + o->out_q_head = NULL; + o->out_q_tail = NULL; + o->out_q_count = 0; + o->out_q_inflight = 0; + if (o->client_id != NULL) { + WOLFMQTT_FREE(o->client_id); + o->client_id = NULL; + } +} + +/* Unlink + free a single orphan from broker->orphan_sessions. */ +static void BrokerOrphan_Remove(MqttBroker* broker, BrokerOrphanSession* o) +{ + BrokerOrphanSession** pp; + if (broker == NULL || o == NULL) { + return; + } + pp = &broker->orphan_sessions; + while (*pp != NULL && *pp != o) { + pp = &(*pp)->next; + } + if (*pp == o) { + *pp = o->next; + if (broker->orphan_session_count > 0) { + broker->orphan_session_count--; + } + } + BrokerOrphan_FreeContents(o); + WOLFMQTT_FREE(o); +} + +/* Drop the oldest orphan (smallest orphan_since) and its subs+persist. + * Returns 1 if one was dropped, 0 if pool was empty. + * + * Complexity is O(N) over the orphan pool because the list is singly + * linked and we scan for the minimum orphan_since timestamp. With the + * default BROKER_MAX_PERSIST_SESSIONS = 64 this is in the noise. + * Operators raising the cap into the thousands should swap this for a + * doubly-linked LRU (next/prev pointers and an oldest-tail pointer on + * MqttBroker) so eviction stays O(1). */ +static int BrokerOrphan_EvictOldest(MqttBroker* broker) +{ + BrokerOrphanSession* cur; + BrokerOrphanSession* oldest = NULL; + if (broker == NULL) { + return 0; + } + for (cur = broker->orphan_sessions; cur != NULL; cur = cur->next) { + if (oldest == NULL || cur->orphan_since < oldest->orphan_since) { + oldest = cur; + } + } + if (oldest == NULL) { + return 0; + } + WBLOG_INFO(broker, + "broker: evicting oldest orphan client_id=%s (cap reached)", + BROKER_STR_VALID(oldest->client_id) ? oldest->client_id + : "(null)"); + BrokerOrphan_DropFull(broker, oldest); + return 1; +} + +/* Shared orphan teardown (callable from broker_persist.c too, hence + * WOLFMQTT_LOCAL linkage). Deletes persisted records, drops the + * orphan's still-NULL-bound subs from broker->subs, and unlinks + + * frees the orphan slot itself. Caller already validated that o is + * actually linked into broker->orphan_sessions. */ +WOLFMQTT_LOCAL void BrokerOrphan_DropFull(MqttBroker* broker, + BrokerOrphanSession* o) +{ + if (broker == NULL || o == NULL) { + return; + } +#ifdef WOLFMQTT_BROKER_PERSIST + if (o->client_id != NULL) { + (void)BrokerPersist_DelSubs(broker, o->client_id); + (void)BrokerPersist_DelSession(broker, o->client_id); + (void)BrokerPersist_DelOutQueue(broker, o->client_id); + } +#endif + /* Drop the orphan's subs entirely (their session is gone). */ + { + BrokerSub* sp = broker->subs; + BrokerSub* prev = NULL; + while (sp != NULL) { + BrokerSub* next = sp->next; + if (sp->client == NULL && sp->client_id != NULL && + o->client_id != NULL && + XSTRCMP(sp->client_id, o->client_id) == 0) { + if (prev != NULL) { + prev->next = next; + } + else { + broker->subs = next; + } + if (sp->filter) { + WOLFMQTT_FREE(sp->filter); + } + if (sp->client_id) { + WOLFMQTT_FREE(sp->client_id); + } + WOLFMQTT_FREE(sp); + } + else { + prev = sp; + } + sp = next; + } + } + BrokerOrphan_Remove(broker, o); +} + +/* Allocate or recycle an orphan slot, then transfer the persistent + * state of bc into it. Subs already point at NULL (caller handled); + * the out_q on bc is unlinked from bc before this returns so + * BrokerClient_Free does not free it. */ +static BrokerOrphanSession* BrokerOrphan_Take(MqttBroker* broker, + BrokerClient* bc) +{ + BrokerOrphanSession* o; + BrokerOrphanSession* existing; + size_t cid_len; + + if (broker == NULL || bc == NULL || !BROKER_STR_VALID(bc->client_id)) { + return NULL; + } + + /* If an orphan already exists for this client_id (e.g., the same + * client took over its own session via duplicate CONNECT), drop it + * before staging the new orphan. */ + existing = BrokerOrphan_Find(broker, bc->client_id); + if (existing != NULL) { + BrokerOrphan_Remove(broker, existing); + } + + /* Cap check; evict oldest if at limit. */ + while (broker->orphan_session_count >= BROKER_MAX_PERSIST_SESSIONS) { + if (!BrokerOrphan_EvictOldest(broker)) { + break; + } + } + + o = (BrokerOrphanSession*)WOLFMQTT_MALLOC(sizeof(*o)); + if (o == NULL) { + return NULL; + } + XMEMSET(o, 0, sizeof(*o)); + cid_len = XSTRLEN(bc->client_id); + o->client_id = (char*)WOLFMQTT_MALLOC(cid_len + 1); + if (o->client_id == NULL) { + WOLFMQTT_FREE(o); + return NULL; + } + XMEMCPY(o->client_id, bc->client_id, cid_len); + o->client_id[cid_len] = '\0'; + + o->protocol_level = bc->protocol_level; + o->session_expiry_sec = bc->session_expiry_sec; + o->orphan_since = WOLFMQTT_BROKER_GET_TIME_S(); + + /* Move out_q ownership. bc->out_q_* must be cleared so + * BrokerClient_FreeOutQueue (called from BrokerClient_Free) + * doesn't double-free. */ + o->out_q_head = bc->out_q_head; + o->out_q_tail = bc->out_q_tail; + o->out_q_count = bc->out_q_count; + o->out_q_inflight = bc->out_q_inflight; + bc->out_q_head = NULL; + bc->out_q_tail = NULL; + bc->out_q_count = 0; + bc->out_q_inflight = 0; + + /* Link at head; orphan_session_count tracks size. */ + o->next = broker->orphan_sessions; + broker->orphan_sessions = o; + broker->orphan_session_count++; + WBLOG_INFO(broker, + "broker: orphan session created client_id=%s queued=%d", + o->client_id, o->out_q_count); + +#ifdef WOLFMQTT_BROKER_PERSIST + /* Shadow-write every transferred QoS 1/2 entry so the queue + * survives a broker restart. QoS 0 entries (if any leaked into + * the queue) are skipped by PutOutPub. */ + { + BrokerOutPub* cur; + for (cur = o->out_q_head; cur != NULL; cur = cur->next) { + if (cur->qos > MQTT_QOS_0) { + (void)BrokerPersist_PutOutPub(broker, o->client_id, cur); + } + } + } +#endif + return o; +} + +/* On reconnect with same client_id: transfer the orphan's queue back + * to the new live BrokerClient and remove the orphan. Returns 1 if an + * orphan was consumed, 0 otherwise. */ +static int BrokerOrphan_Reclaim(MqttBroker* broker, BrokerClient* new_bc) +{ + BrokerOrphanSession* o; + if (broker == NULL || new_bc == NULL || + !BROKER_STR_VALID(new_bc->client_id)) { + return 0; + } + o = BrokerOrphan_Find(broker, new_bc->client_id); + if (o == NULL) { + return 0; + } + /* Move queue ownership back. The new bc's own out_q is expected + * to be empty at this point (fresh BrokerClient post-CONNECT). */ + new_bc->out_q_head = o->out_q_head; + new_bc->out_q_tail = o->out_q_tail; + new_bc->out_q_count = o->out_q_count; + new_bc->out_q_inflight = 0; + o->out_q_head = NULL; + o->out_q_tail = NULL; + o->out_q_count = 0; + o->out_q_inflight = 0; + if (new_bc->session_expiry_sec == 0xFFFFFFFFu) { + new_bc->session_expiry_sec = o->session_expiry_sec; + } + /* MQTT-4.4.0-1: any PUBLISH that was previously in-flight on the + * old session is re-sent with DUP=1. Reset PUBLISH_SENT -> QUEUED + * here and mark retransmit_dup so the drain emits the correct + * flag on first re-send. PUBREL_SENT stays as-is: the resend of + * a PUBREL is a fresh PUBREL with the same packet_id and carries + * no DUP semantics (PUBREL has no flag for it). The drain handles + * PUBREL_SENT today by leaving it untouched; the broker re-emits + * PUBREL on receipt of a duplicate PUBREC (existing path). */ + { + BrokerOutPub* e = new_bc->out_q_head; + int retx = 0; + while (e != NULL) { + if (e->state == BROKER_OUTQ_PUBLISH_SENT) { + e->state = BROKER_OUTQ_QUEUED; + e->retransmit_dup = 1; + retx++; + } + else if (e->state == BROKER_OUTQ_PUBREL_SENT) { + /* Still in flight by definition - the prior session + * was awaiting PUBCOMP. Restore the inflight count + * (zeroed above) so the Receive Maximum / + * BROKER_MAX_INFLIGHT_PER_SUB cap stays accurate. */ + new_bc->out_q_inflight++; + } + e = e->next; + } + if (retx > 0) { + WBLOG_INFO(broker, + "broker: orphan reclaim queued retransmit=%d client_id=%s", + retx, new_bc->client_id); + } + } + WBLOG_INFO(broker, + "broker: orphan reclaimed client_id=%s queued=%d", + new_bc->client_id, new_bc->out_q_count); +#ifdef WOLFMQTT_BROKER_PERSIST + /* The reclaimed queue is now in a LIVE BrokerClient. Persisted + * records for this client_id are no longer authoritative - the + * subscriber will receive these via the upcoming drain and ack + * them. Wipe the on-disk copies so a subsequent crash doesn't + * re-deliver them. */ + (void)BrokerPersist_DelOutQueue(broker, new_bc->client_id); +#endif + BrokerOrphan_Remove(broker, o); + return 1; +} + +/* Enqueue a fan-out target onto an orphan session's queue. Called from + * BrokerHandle_Publish when sub->client is NULL but an orphan with the + * matching client_id exists. QoS 0 is dropped per spec; only persistent + * messages live in the offline queue. */ +static void BrokerOrphan_Enqueue(MqttBroker* broker, BrokerOrphanSession* o, + const char* topic, const byte* payload, word32 payload_len, + MqttQoS qos, byte retain) +{ + BrokerOutPub* e; + if (broker == NULL || o == NULL || topic == NULL || + qos == MQTT_QOS_0) { + return; + } + /* Drop-oldest eviction when the per-session offline queue is full. */ + while (o->out_q_count >= BROKER_MAX_OFFLINE_MSGS_PER_SUB) { + BrokerOutPub* head = o->out_q_head; + if (head == NULL) { + break; + } + o->out_q_head = head->next; + if (o->out_q_tail == head) { + o->out_q_tail = NULL; + } + if (o->out_q_count > 0) { + o->out_q_count--; + } + #ifdef WOLFMQTT_BROKER_PERSIST + if (o->client_id != NULL && head->packet_id != 0) { + (void)BrokerPersist_DelOutPub(broker, o->client_id, + head->packet_id); + } + #endif + if (head->topic) WOLFMQTT_FREE(head->topic); + if (head->payload) WOLFMQTT_FREE(head->payload); + WOLFMQTT_FREE(head); + } + + e = BrokerOutPub_Alloc(topic, payload, payload_len); + if (e == NULL) { + WBLOG_ERR(broker, + "broker: orphan enqueue alloc failed client_id=%s", + BROKER_STR_VALID(o->client_id) ? o->client_id : "(null)"); + return; + } + e->qos = qos; + e->packet_id = BrokerNextPacketId(broker); + e->retain = retain; + e->state = BROKER_OUTQ_QUEUED; + e->enq_time = WOLFMQTT_BROKER_GET_TIME_S(); + e->protocol_level = o->protocol_level; + e->next = NULL; + if (o->out_q_tail != NULL) { + o->out_q_tail->next = e; + } + else { + o->out_q_head = e; + } + o->out_q_tail = e; + o->out_q_count++; +#ifdef WOLFMQTT_BROKER_PERSIST + (void)BrokerPersist_PutOutPub(broker, o->client_id, e); +#endif + WBLOG_DBG(broker, + "broker: orphan enqueue client_id=%s topic=%s qos=%d count=%d", + BROKER_STR_VALID(o->client_id) ? o->client_id : "(null)", + topic, (int)qos, o->out_q_count); +} + +/* Free every orphan (used by MqttBroker_Free and by wipe paths). */ +static void BrokerOrphan_FreeAll(MqttBroker* broker) +{ + BrokerOrphanSession* cur; + if (broker == NULL) { + return; + } + cur = broker->orphan_sessions; + while (cur != NULL) { + BrokerOrphanSession* next = cur->next; + BrokerOrphan_FreeContents(cur); + WOLFMQTT_FREE(cur); + cur = next; + } + broker->orphan_sessions = NULL; + broker->orphan_session_count = 0; +} +#endif /* !WOLFMQTT_STATIC_MEMORY */ + +/* Forward declaration; orphan-take-failure rollback in + * BrokerSubs_OrphanClient falls back to the clean removal path. */ +static void BrokerSubs_RemoveClient(MqttBroker* broker, BrokerClient* bc); + static void BrokerSubs_OrphanClient(MqttBroker* broker, BrokerClient* bc) { #ifdef WOLFMQTT_STATIC_MEMORY @@ -1652,10 +2459,14 @@ static void BrokerSubs_OrphanClient(MqttBroker* broker, BrokerClient* bc) #endif int count = 0; + /* First pass: count matching subs without yet detaching them. We + * must not flip cur->client to NULL until we know the orphan + * carrier was successfully created - otherwise an allocation + * failure inside BrokerOrphan_Take would leave dangling subs in + * broker->subs with no carrier and no way to be reclaimed. */ #ifdef WOLFMQTT_STATIC_MEMORY for (i = 0; i < BROKER_MAX_SUBS; i++) { if (broker->subs[i].in_use && broker->subs[i].client == bc) { - broker->subs[i].client = NULL; count++; } } @@ -1663,16 +2474,53 @@ static void BrokerSubs_OrphanClient(MqttBroker* broker, BrokerClient* bc) cur = broker->subs; while (cur) { if (cur->client == bc) { - cur->client = NULL; count++; } cur = cur->next; } #endif - if (count > 0) { - WBLOG_INFO(broker, "broker: orphaned %d subs for client_id=%s (session persist)", - count, BROKER_STR_VALID(bc->client_id) ? bc->client_id : "(null)"); + if (count == 0) { + return; + } + +#ifndef WOLFMQTT_STATIC_MEMORY + /* Stage a persistent-session record in broker->orphan_sessions. + * Carries the out_q ownership across the upcoming + * BrokerClient_Free so messages published while disconnected + * can still be queued for the eventual reconnect. If Take fails + * (OOM), fall through to clean removal so subs are not left + * dangling without a carrier. */ + if (BrokerOrphan_Take(broker, bc) == NULL) { + WBLOG_ERR(broker, + "broker: orphan take failed client_id=%s - removing %d subs", + BROKER_STR_VALID(bc->client_id) ? bc->client_id : "(null)", + count); + BrokerSubs_RemoveClient(broker, bc); + return; + } +#endif + + /* Second pass: detach. Safe to mutate now - the carrier exists + * (dynamic mode) or the broker decided not to persist (static + * mode); either way, count > 0 reached this point. */ +#ifdef WOLFMQTT_STATIC_MEMORY + for (i = 0; i < BROKER_MAX_SUBS; i++) { + if (broker->subs[i].in_use && broker->subs[i].client == bc) { + broker->subs[i].client = NULL; + } + } +#else + cur = broker->subs; + while (cur) { + if (cur->client == bc) { + cur->client = NULL; + } + cur = cur->next; } +#endif + WBLOG_INFO(broker, + "broker: orphaned %d subs for client_id=%s (session persist)", + count, BROKER_STR_VALID(bc->client_id) ? bc->client_id : "(null)"); } static void BrokerSubs_RemoveClient(MqttBroker* broker, BrokerClient* bc) @@ -1714,6 +2562,21 @@ static void BrokerSubs_RemoveClient(MqttBroker* broker, BrokerClient* bc) cur = next; } #endif + +#ifdef WOLFMQTT_BROKER_PERSIST + /* Clean-session disconnect drops the persistent record. For + * non-clean disconnects the broker uses BrokerSubs_OrphanClient + * instead (subs stay in memory, persist record stays intact). + * Guard on bc->clean_session so paths that reach this function + * for a clean_session=0 client (takeover, socket-error teardown, + * shutdown sweep) do not silently wipe a persistent record that + * the orphan path is meant to preserve. */ + if (bc != NULL && bc->clean_session && + BROKER_STR_VALID(bc->client_id)) { + (void)BrokerPersist_DelSubs(broker, bc->client_id); + (void)BrokerPersist_DelSession(broker, bc->client_id); + } +#endif } static int BrokerSubs_Add(MqttBroker* broker, BrokerClient* bc, @@ -2191,6 +3054,9 @@ static int BrokerRetained_Store(MqttBroker* broker, const char* topic, WBLOG_DBG(broker, "broker: retained store topic=%s len=%u qos=%d " "expiry=%u", topic, (unsigned)payload_len, (int)qos, (unsigned)expiry_sec); +#ifdef WOLFMQTT_BROKER_PERSIST + (void)BrokerPersist_PutRetained(broker, msg); +#endif } return rc; } @@ -2203,6 +3069,7 @@ static void BrokerRetained_Delete(MqttBroker* broker, const char* topic) BrokerRetainedMsg* cur; BrokerRetainedMsg* prev = NULL; #endif + int found = 0; if (broker == NULL || topic == NULL) { return; @@ -2213,7 +3080,8 @@ static void BrokerRetained_Delete(MqttBroker* broker, const char* topic) XSTRCMP(broker->retained[i].topic, topic) == 0) { WBLOG_DBG(broker, "broker: retained delete topic=%s", topic); XMEMSET(&broker->retained[i], 0, sizeof(BrokerRetainedMsg)); - return; + found = 1; + break; } } #else @@ -2233,12 +3101,21 @@ static void BrokerRetained_Delete(MqttBroker* broker, const char* topic) WOLFMQTT_FREE(cur->payload); } WOLFMQTT_FREE(cur); - return; + found = 1; + break; } prev = cur; cur = next; } #endif + +#ifdef WOLFMQTT_BROKER_PERSIST + if (found) { + (void)BrokerPersist_DelRetained(broker, topic); + } +#else + (void)found; +#endif } static void BrokerRetained_FreeAll(MqttBroker* broker) @@ -3121,6 +3998,59 @@ static int BrokerHandle_Connect(BrokerClient* bc, int rx_len, bc->keep_alive_sec = mc.keep_alive_sec; bc->last_rx = WOLFMQTT_BROKER_GET_TIME_S(); +#ifndef WOLFMQTT_STATIC_MEMORY + /* Default Session Expiry. Set BEFORE the v5 property parse below + * so that a v5 client carrying MQTT_PROP_SESSION_EXPIRY_INTERVAL + * overrides this default rather than being silently clobbered: + * - v3.1.1 persistent (clean_session=0): 0xFFFFFFFF (server + * policy decides eviction; MQTT 3.1.1 sec 3.1.2.4). + * - clean_session=1 or v5 client without the property: 0 + * (expire on disconnect, per MQTT v5 sec 3.1.2.11.2). */ + if (!mc.clean_session) { + bc->session_expiry_sec = 0xFFFFFFFFu; + } + else { + bc->session_expiry_sec = 0; + } +#endif + +#if defined(WOLFMQTT_V5) && !defined(WOLFMQTT_STATIC_MEMORY) + /* [MQTT-3.1.2.11.3] v5 Receive Maximum. If present and non-zero, the + * client is telling us not to exceed this many outbound QoS 1/2 + * PUBLISHes in flight to it. Absent property means 65535 (no + * client-imposed cap). 0 is a protocol error, but tolerate it as + * "unset" rather than disconnecting, to stay friendly to mildly + * non-conforming clients - the actual cap then comes from + * BROKER_MAX_INFLIGHT_PER_SUB alone. */ + if (mc.protocol_level >= MQTT_CONNECT_PROTOCOL_LEVEL_5 && + mc.props != NULL) { + MqttProp* rm_prop = BrokerProps_Find(mc.props, + MQTT_PROP_RECEIVE_MAX); + if (rm_prop != NULL && rm_prop->data_short > 0) { + bc->client_receive_max = rm_prop->data_short; + WBLOG_DBG(broker, + "broker: client Receive Maximum sock=%d value=%u", + (int)bc->sock, (unsigned)bc->client_receive_max); + } + /* [MQTT-3.1.2.11.2] v5 Session Expiry Interval. If present, + * carry it onto bc->session_expiry_sec so the disconnect + * path stamps it into the orphan record. Absent property + * means the default set above stands (0 for clean_session=1, + * 0xFFFFFFFF for clean_session=0 to honor v3.1.1 persistence + * semantics when a v5 client opts in without the property). */ + { + MqttProp* se_prop = BrokerProps_Find(mc.props, + MQTT_PROP_SESSION_EXPIRY_INTERVAL); + if (se_prop != NULL) { + bc->session_expiry_sec = se_prop->data_int; + WBLOG_DBG(broker, + "broker: client Session Expiry sock=%d value=%u", + (int)bc->sock, (unsigned)bc->session_expiry_sec); + } + } + } +#endif + /* [MQTT-3.1.3-6] If we accepted a zero-length ClientId, assign a unique * server-generated one before the duplicate-check / session-resume block * below so the assigned ID flows through normal handling. v5 also echoes @@ -3178,6 +4108,7 @@ static int BrokerHandle_Connect(BrokerClient* bc, int rx_len, /* Client ID uniqueness and clean session handling */ bc->clean_session = mc.clean_session; + if (BROKER_STR_VALID(bc->client_id)) { BrokerClient* old; @@ -3222,7 +4153,27 @@ static int BrokerHandle_Connect(BrokerClient* bc, int rx_len, if (mc.clean_session) { /* Remove any remaining subs for this client_id */ BrokerSubs_RemoveByClientId(broker, bc->client_id); + #ifndef WOLFMQTT_STATIC_MEMORY + { + BrokerOrphanSession* o = + BrokerOrphan_Find(broker, bc->client_id); + if (o != NULL) { + BrokerOrphan_Remove(broker, o); + } + } + #endif } + #ifndef WOLFMQTT_STATIC_MEMORY + else { + /* Persistent session reconnect: pick up any queued messages + * left by the prior incarnation. Inherits session_expiry + * from the orphan when this CONNECT did not specify one. + * Drain is invoked further down, after CONNACK is sent. */ + if (BrokerOrphan_Reclaim(broker, bc)) { + session_present = 1; + } + } + #endif } /* Store Last Will and Testament */ @@ -3460,9 +4411,9 @@ static int BrokerHandle_Connect(BrokerClient* bc, int rx_len, } /* [MQTT-3.2.2.3.4] Maximum QoS property MUST be 0 or 1. Absence * of the property signals server supports Maximum QoS 2. Emitting - * Maximum QoS = 2 is a Protocol Error and strict v5 clients will - * disconnect on receipt. Emit the property only when this build - * caps below QoS 2 via WOLFMQTT_MAX_QOS. */ + * Maximum QoS = 2 is a Protocol Error and strict v5 clients (e.g. + * mosquitto) will disconnect on receipt. Emit the property only + * when this build caps below QoS 2 via WOLFMQTT_MAX_QOS. */ #if WOLFMQTT_MAX_QOS < 2 prop = MqttProps_Add(&ack.props); if (prop != NULL) { @@ -3470,6 +4421,25 @@ static int BrokerHandle_Connect(BrokerClient* bc, int rx_len, prop->data_byte = (byte)WOLFMQTT_MAX_QOS; } #endif + + /* [MQTT-3.2.2.3.3] Receive Maximum. Advertise the broker's + * per-client inbound QoS 1/2 cap so well-behaved publishers can + * pace themselves. We accept up to BROKER_MAX_INBOUND_QOS2 + * concurrent QoS 2 PUBLISHes awaiting PUBREL ([MQTT-4.3.3]); use + * the same number as the wire value. The property MUST NOT be + * 0 - we skip the emission entirely in that (unreachable) case + * so a future tunable down to 0 cannot send an illegal value. + * The cap applies in both dynamic and static memory modes. */ + if (BROKER_MAX_INBOUND_QOS2 > 0) { + prop = MqttProps_Add(&ack.props); + if (prop != NULL) { + prop->type = MQTT_PROP_RECEIVE_MAX; + prop->data_short = + (BROKER_MAX_INBOUND_QOS2 > 0xFFFF) ? + (word16)0xFFFF : + (word16)BROKER_MAX_INBOUND_QOS2; + } + } } #endif @@ -3504,6 +4474,28 @@ static int BrokerHandle_Connect(BrokerClient* bc, int rx_len, if (ack.return_code != MQTT_CONNECT_ACK_CODE_ACCEPTED) { return 0; } + +#ifdef WOLFMQTT_BROKER_PERSIST + /* Successful CONNECT with clean_session=0 -> shadow-write the + * session record. Already-persisted sessions get overwritten with + * their current protocol_level / client_id, which is harmless. The + * persist layer no-ops when no hooks are installed. */ + if (!bc->clean_session && BROKER_STR_VALID(bc->client_id)) { + (void)BrokerPersist_PutSession(broker, bc); + } +#endif + +#ifndef WOLFMQTT_STATIC_MEMORY + /* If the reconnect inherited a non-empty queue from an orphan + * session, drain it now so the subscriber sees the queued + * messages on the heels of CONNACK. (BrokerOrphan_Reclaim already + * moved entries into bc->out_q above; drain dispatches up to the + * inflight cap as usual.) */ + if (bc->out_q_count > 0) { + BrokerClient_DrainOutQueue(bc); + } +#endif + return rc; } @@ -3598,6 +4590,15 @@ static int BrokerHandle_Subscribe(BrokerClient* bc, int rx_len, * avoid reading past the end of the return_codes array */ rc = BrokerSend_SubAck(bc, sub.packet_id, return_codes, i); +#ifdef WOLFMQTT_BROKER_PERSIST + /* Shadow-write the full subscription list for this client. Only + * meaningful for clean_session=0 sessions; the persist layer no-ops + * when no hooks are installed. */ + if (rc > 0 && !bc->clean_session && BROKER_STR_VALID(bc->client_id)) { + (void)BrokerPersist_PutSubs(broker, bc->client_id); + } +#endif + #ifdef WOLFMQTT_V5 if (sub.props) { (void)MqttProps_Free(sub.props); @@ -3667,6 +4668,13 @@ static int BrokerHandle_Unsubscribe(BrokerClient* bc, int rx_len, rc = MqttPacket_Write(&bc->client, bc->tx_buf, rc); } +#ifdef WOLFMQTT_BROKER_PERSIST + /* Re-snapshot subs (PutSubs converts count=0 into a DelSubs). */ + if (rc > 0 && !bc->clean_session && BROKER_STR_VALID(bc->client_id)) { + (void)BrokerPersist_PutSubs(broker, bc->client_id); + } +#endif + #ifdef WOLFMQTT_V5 if (unsub.props) { (void)MqttProps_Free(unsub.props); @@ -3848,54 +4856,114 @@ static int BrokerHandle_Publish(BrokerClient* bc, int rx_len, sub->client->protocol_level != 0 && BROKER_STR_VALID(sub->filter) && BrokerTopicMatch(sub->filter, topic)) { - int sub_rc; - MqttPublish out_pub; MqttQoS eff_qos; - XMEMSET(&out_pub, 0, sizeof(out_pub)); - out_pub.topic_name = topic; eff_qos = (pub.qos < sub->qos) ? pub.qos : sub->qos; - out_pub.qos = eff_qos; - if (eff_qos >= MQTT_QOS_1) { - out_pub.packet_id = BrokerNextPacketId(broker); +#ifdef WOLFMQTT_STATIC_MEMORY + /* Static-memory mode keeps the legacy synchronous + * fan-out: no per-subscriber queue, no inflight cap. + * Sub-encoder failure is logged but not propagated. */ + { + int sub_rc; + MqttPublish out_pub; + XMEMSET(&out_pub, 0, sizeof(out_pub)); + out_pub.topic_name = topic; + out_pub.qos = eff_qos; + if (eff_qos >= MQTT_QOS_1) { + out_pub.packet_id = BrokerNextPacketId(broker); + } + out_pub.retain = 0; + out_pub.duplicate = 0; + out_pub.buffer = payload; + out_pub.total_len = pub.total_len; + #ifdef WOLFMQTT_V5 + out_pub.protocol_level = sub->client->protocol_level; + if (sub->client->protocol_level >= + MQTT_CONNECT_PROTOCOL_LEVEL_5) { + out_pub.props = pub.props; + } + #endif + sub_rc = MqttEncode_Publish(sub->client->tx_buf, + BROKER_CLIENT_TX_SZ(sub->client), &out_pub, 0); + if (sub_rc > 0) { + WBLOG_DBG(broker, + "broker: PUBLISH fwd sock=%d -> sock=%d " + "topic=%s qos=%d len=%u", + (int)bc->sock, (int)sub->client->sock, + topic, eff_qos, (unsigned)pub.total_len); + (void)MqttPacket_Write(&sub->client->client, + sub->client->tx_buf, sub_rc); + } + else { + WBLOG_ERR(broker, + "broker: PUBLISH fwd encode failed " + "sock=%d -> sock=%d rc=%d", + (int)bc->sock, (int)sub->client->sock, sub_rc); + } } - out_pub.retain = 0; - out_pub.duplicate = 0; - out_pub.buffer = payload; - out_pub.total_len = pub.total_len; -#ifdef WOLFMQTT_V5 - out_pub.protocol_level = sub->client->protocol_level; - if (sub->client->protocol_level >= - MQTT_CONNECT_PROTOCOL_LEVEL_5) { - out_pub.props = pub.props; +#else + /* Dynamic mode: enqueue a heap-owned copy on the + * subscriber's out_q, then drain. The queue gives us + * the inflight cap (#7 ordered delivery) and is the + * substrate for the offline queue in PR2. Invariant: + * MqttDecode_Publish above has populated pub.buffer + * with at least pub.total_len contiguous bytes (full + * PUBLISH is fully received and decoded before we + * reach the fan-out); BrokerOutPub_Alloc deep-copies + * pub.total_len from that buffer. */ + { + BrokerOutPub* e = BrokerOutPub_Alloc(topic, payload, + pub.total_len); + if (e == NULL) { + WBLOG_ERR(broker, + "broker: PUBLISH fwd alloc failed sock=%d " + "-> sock=%d", (int)bc->sock, + (int)sub->client->sock); + } + else { + e->qos = eff_qos; + if (eff_qos >= MQTT_QOS_1) { + e->packet_id = BrokerNextPacketId(broker); + } + e->retain = 0; + e->state = BROKER_OUTQ_QUEUED; + #ifdef WOLFMQTT_V5 + e->protocol_level = sub->client->protocol_level; + #endif + BrokerClient_EnqueueOutPub(sub->client, e); + WBLOG_DBG(broker, + "broker: PUBLISH enq sock=%d -> sock=%d " + "topic=%s qos=%d len=%u", + (int)bc->sock, (int)sub->client->sock, + topic, eff_qos, (unsigned)pub.total_len); + BrokerClient_DrainOutQueue(sub->client); + } } #endif - /* Use a per-subscriber rc: a subscriber's encode/write - * failure (e.g., undersized tx_buf) is a peer-side - * issue and must not be propagated up as the - * publisher's return code, or the publisher would be - * wrongly disconnected by the dispatch's fatal-rc - * gate (especially for QoS 0, where the function- - * level rc is otherwise never overwritten before - * return). */ - sub_rc = MqttEncode_Publish(sub->client->tx_buf, - BROKER_CLIENT_TX_SZ(sub->client), &out_pub, 0); - if (sub_rc > 0) { - WBLOG_DBG(broker, - "broker: PUBLISH fwd sock=%d -> sock=%d " - "topic=%s qos=%d len=%u", - (int)bc->sock, (int)sub->client->sock, - topic, eff_qos, (unsigned)pub.total_len); - (void)MqttPacket_Write(&sub->client->client, - sub->client->tx_buf, sub_rc); - } - else { - WBLOG_ERR(broker, - "broker: PUBLISH fwd encode failed sock=%d -> " - "sock=%d rc=%d", - (int)bc->sock, (int)sub->client->sock, sub_rc); - } } #ifndef WOLFMQTT_STATIC_MEMORY + /* Note on iteration model: static-mode walks the BrokerSub + * array via for (i=0; inext below. The orphan + * branch (sub->client == NULL) only exists in dynamic mode - + * static-mode orphan handling lives in the restore path. */ + else if (sub->client == NULL && sub->client_id != NULL && + BROKER_STR_VALID(sub->filter) && + BrokerTopicMatch(sub->filter, topic)) { + /* Orphaned persistent session: subscriber is currently + * disconnected. Queue QoS 1/2 messages on the orphan + * slot for delivery on reconnect. */ + MqttQoS eff_qos = + (pub.qos < sub->qos) ? pub.qos : sub->qos; + if (eff_qos > MQTT_QOS_0) { + BrokerOrphanSession* o = + BrokerOrphan_Find(broker, sub->client_id); + if (o != NULL) { + BrokerOrphan_Enqueue(broker, o, topic, payload, + pub.total_len, eff_qos, 0); + } + } + } sub = sub->next; #endif } @@ -3993,6 +5061,15 @@ static int BrokerHandle_PublishRec(BrokerClient* bc, int rx_len) return rc; } +#ifndef WOLFMQTT_STATIC_MEMORY + /* Advance the out_q entry from PUBLISH_SENT to PUBREL_SENT. The + * PUBREL we send below is correlated to this entry; PUBCOMP from the + * subscriber will then close it out. A spurious PUBREC (no matching + * entry) still gets a PUBREL response for idempotency, just no + * queue state change. */ + (void)BrokerClient_OnPubRec(bc, resp.packet_id); +#endif + #ifdef WOLFMQTT_V5 if (resp.props) { (void)MqttProps_Free(resp.props); @@ -4180,7 +5257,27 @@ static int BrokerClient_Process(MqttBroker* broker, BrokerClient* bc) break; } case MQTT_PACKET_TYPE_PUBLISH_ACK: - /* QoS 1 ack from subscriber - delivery complete */ + /* QoS 1 ack from subscriber - delivery complete. In + * dynamic-memory mode, locate the matching out_q entry, + * unlink/free it, decrement inflight, and drain. */ +#ifndef WOLFMQTT_STATIC_MEMORY + { + MqttPublishResp ack_resp; + XMEMSET(&ack_resp, 0, sizeof(ack_resp)); + #ifdef WOLFMQTT_V5 + ack_resp.protocol_level = bc->protocol_level; + #endif + if (MqttDecode_PublishResp(bc->rx_buf, rc, + MQTT_PACKET_TYPE_PUBLISH_ACK, &ack_resp) >= 0) { + BrokerClient_OnPubAck(bc, ack_resp.packet_id); + } + #ifdef WOLFMQTT_V5 + if (ack_resp.props) { + (void)MqttProps_Free(ack_resp.props); + } + #endif + } +#endif break; #if WOLFMQTT_MAX_QOS >= 2 case MQTT_PACKET_TYPE_PUBLISH_REC: @@ -4207,7 +5304,26 @@ static int BrokerClient_Process(MqttBroker* broker, BrokerClient* bc) } case MQTT_PACKET_TYPE_PUBLISH_COMP: /* QoS 2 step 4: subscriber sends PUBCOMP - delivery - * complete */ + * complete. Remove the matching out_q entry (state + * PUBREL_SENT), decrement inflight, drain. */ +#ifndef WOLFMQTT_STATIC_MEMORY + { + MqttPublishResp comp_resp; + XMEMSET(&comp_resp, 0, sizeof(comp_resp)); + #ifdef WOLFMQTT_V5 + comp_resp.protocol_level = bc->protocol_level; + #endif + if (MqttDecode_PublishResp(bc->rx_buf, rc, + MQTT_PACKET_TYPE_PUBLISH_COMP, &comp_resp) >= 0) { + BrokerClient_OnPubComp(bc, comp_resp.packet_id); + } + #ifdef WOLFMQTT_V5 + if (comp_resp.props) { + (void)MqttProps_Free(comp_resp.props); + } + #endif + } +#endif break; #endif /* WOLFMQTT_MAX_QOS >= 2 */ case MQTT_PACKET_TYPE_SUBSCRIBE: @@ -4520,6 +5636,13 @@ int MqttBroker_Start(MqttBroker* broker) return MQTT_CODE_ERROR_BAD_ARG; } +#ifdef WOLFMQTT_BROKER_PERSIST + /* Restore persisted state (orphan subs, retained messages) before + * opening the listen sockets so reconnecting clients see the + * resumed session immediately. No-op when no hooks are installed. */ + (void)BrokerPersist_Restore(broker); +#endif + #ifdef ENABLE_MQTT_TLS /* Initialize TLS context if TLS is enabled */ if (broker->use_tls) { @@ -4711,6 +5834,9 @@ int MqttBroker_Free(MqttBroker* broker) /* Clean up pending wills and retained messages */ BrokerPendingWill_FreeAll(broker); BrokerRetained_FreeAll(broker); +#ifndef WOLFMQTT_STATIC_MEMORY + BrokerOrphan_FreeAll(broker); +#endif #ifdef ENABLE_MQTT_TLS if (broker->tls_ctx != NULL) { @@ -4746,6 +5872,18 @@ int MqttBroker_Free(MqttBroker* broker) } #endif +#if defined(WOLFMQTT_BROKER_PERSIST) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) + /* Zero the cached AES key on teardown. ForceZero so the compiler + * cannot elide the wipe (plain XMEMSET on a value that becomes + * dead-on-return is at the compiler's discretion). */ + if (broker->persist_key_loaded) { + BROKER_FORCE_ZERO(broker->persist_key_cache, + sizeof(broker->persist_key_cache)); + broker->persist_key_loaded = 0; + } +#endif + return MQTT_CODE_SUCCESS; } @@ -4778,6 +5916,9 @@ static void BrokerUsage(const char* prog) #endif #ifdef ENABLE_MQTT_WEBSOCKET PRINTF(" -w WebSocket listen port (enables WebSocket)"); +#endif +#ifdef WOLFMQTT_BROKER_PERSIST + PRINTF(" -D Persistent storage directory (enables persistence)"); #endif PRINTF("Features:" #ifdef WOLFMQTT_BROKER_RETAINED @@ -4800,6 +5941,19 @@ static void BrokerUsage(const char* prog) #endif #ifdef ENABLE_MQTT_WEBSOCKET " websocket" +#endif +#ifdef WOLFMQTT_BROKER_PERSIST + " persist" +#endif +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + " persist-encrypt" +#endif +#if defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY) + " persist-encrypt-dev-key" +#endif +#ifdef WOLFMQTT_STATIC_MEMORY + " static-memory" #endif ); } @@ -4815,12 +5969,53 @@ static void broker_signal_handler(int signo) } #endif +#if defined(WOLFMQTT_BROKER_PERSIST) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY) +/* Development-only derive_key hook. Returns a fixed 32-byte key so the + * CLI can exercise the AES-GCM persistence round-trip without external + * key management. Real deployments override this via + * MqttBroker_SetPersistHooks before MqttBroker_Start. Compile-time + * gated so the fixed-pattern key generator is not linked into a + * production binary (where flipping a runtime flag would otherwise + * substitute trivially-recoverable keys for real ones). */ +static int wolfmqtt_broker_dev_derive_key(void* ctx, byte* out_key, + word32 key_len) +{ + word32 i; + (void)ctx; + if (out_key == NULL || key_len < 32) { + return MQTT_CODE_ERROR_BAD_ARG; + } + /* Fixed pattern. Operators must replace this with a real key + * derivation before relying on confidentiality. */ + for (i = 0; i < key_len; i++) { + out_key[i] = (byte)(0xA0 + (i & 0x0F)); + } + return 0; +} +#endif + int wolfmqtt_broker(int argc, char** argv) { int rc; MqttBroker broker; MqttBrokerNet net; int i; +#ifdef WOLFMQTT_BROKER_PERSIST + MqttBrokerPersistHooks persist_hooks; + const char* persist_dir = NULL; + int persist_initialized = 0; + #if defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY) + /* Encrypt-key source. NULL = unset (broker refuses to start when + * encrypt is enabled and persist_dir is given). "dev" = use the + * hard-coded dev key for CI/smoke tests. Only declared when the + * dev-key generator is compiled in - the -E option has no other + * recognized value. */ + const char* encrypt_key_source = NULL; + #endif +#endif /* Set stdout to unbuffered for immediate output */ #ifndef WOLFMQTT_NO_STDIO @@ -4888,6 +6083,24 @@ int wolfmqtt_broker(int argc, char** argv) broker.ws_port = (word16)XATOI(argv[++i]); broker.use_websocket = 1; } +#endif +#ifdef WOLFMQTT_BROKER_PERSIST + else if (XSTRCMP(argv[i], "-D") == 0 && i + 1 < argc) { + persist_dir = argv[++i]; + } + #if defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY) + else if (XSTRCMP(argv[i], "-E") == 0 && i + 1 < argc) { + /* Encrypt key source. Only "dev" is recognized: install the + * hard-coded development key (NOT for production - the key + * is a fixed pattern in the binary, trivially recoverable). + * Production embedders should install their own derive_key + * hook via MqttBroker_SetPersistHooks and skip this CLI. + * The -E flag and the dev key generator are both compile- + * gated by WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY. */ + encrypt_key_source = argv[++i]; + } + #endif #endif else if (XSTRCMP(argv[i], "-h") == 0) { BrokerUsage(argv[0]); @@ -4899,6 +6112,69 @@ int wolfmqtt_broker(int argc, char** argv) } } +#ifdef WOLFMQTT_BROKER_PERSIST + /* If -D was passed, enable the default POSIX persistence backend + * rooted at that directory. Absent the flag, persist hooks remain + * uninstalled and the broker behaves like a build without + * WOLFMQTT_BROKER_PERSIST. */ + if (persist_dir != NULL) { + #ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + #ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY + /* This build enables AES-GCM at rest. Refuse to start unless + * the operator explicitly opted in to a key source. The only + * built-in option from this CLI is "-E dev" (development key). + * Embedders providing real key management install derive_key + * via MqttBroker_SetPersistHooks and don't reach this code. */ + if (encrypt_key_source == NULL) { + PRINTF("broker: ERROR persist+encrypt build needs -E " + "(only \"dev\" is recognized; production deployments " + "must install MqttBrokerPersistHooks.derive_key)"); + return MQTT_CODE_ERROR_BAD_ARG; + } + if (XSTRCMP(encrypt_key_source, "dev") != 0) { + PRINTF("broker: ERROR unknown -E source \"%s\" " + "(only \"dev\" is recognized)", encrypt_key_source); + return MQTT_CODE_ERROR_BAD_ARG; + } + #else + /* Encrypt is built in but the development key generator is not. + * The CLI cannot install a real derive_key on the operator's + * behalf - refuse explicitly so the failure mode is obvious. */ + PRINTF("broker: ERROR persist+encrypt build has no built-in key " + "source (rebuild with --enable-broker-persist-encrypt-dev-key " + "for testing, or install MqttBrokerPersistHooks.derive_key)"); + return MQTT_CODE_ERROR_BAD_ARG; + #endif + #endif + rc = MqttBrokerNet_PersistPosix_Init(&persist_hooks, persist_dir); + if (rc != 0) { + PRINTF("broker: persist init failed dir=%s rc=%d", + persist_dir, rc); + return rc; + } + persist_initialized = 1; + #if defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY) + /* Install the development-only derive_key hook. NOT for + * production - the key is a fixed pattern in the binary and is + * trivially recoverable by any adversary with read access. The + * "DEV-KEY" log line below makes the choice obvious. */ + persist_hooks.derive_key = wolfmqtt_broker_dev_derive_key; + #endif + (void)MqttBroker_SetPersistHooks(&broker, &persist_hooks); + PRINTF("broker: persist enabled dir=%s%s", persist_dir, + #if defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) && \ + defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT_DEV_KEY) + " (encrypted, DEV-KEY: NOT FOR PRODUCTION)" + #elif defined(WOLFMQTT_BROKER_PERSIST_ENCRYPT) + " (encrypted)" + #else + "" + #endif + ); + } +#endif + #if !defined(WOLFMQTT_WOLFIP) && !defined(WOLFMQTT_BROKER_CUSTOM_NET) && \ !defined(NO_MAIN_DRIVER) /* Reset shutdown flag so this wrapper is reusable across multiple @@ -4929,6 +6205,13 @@ int wolfmqtt_broker(int argc, char** argv) #endif MqttBroker_Free(&broker); + +#ifdef WOLFMQTT_BROKER_PERSIST + if (persist_initialized) { + MqttBrokerNet_PersistPosix_Free(&persist_hooks); + } +#endif + return rc; } diff --git a/src/mqtt_broker_persist.c b/src/mqtt_broker_persist.c new file mode 100644 index 00000000..b98dc92b --- /dev/null +++ b/src/mqtt_broker_persist.c @@ -0,0 +1,1953 @@ +/* mqtt_broker_persist.c + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfMQTT. + * + * wolfMQTT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfMQTT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* Shadow-write helpers for the broker persistence layer. Every public + * helper here is callable from src/mqtt_broker.c at the corresponding + * state-change trigger point. Helpers no-op when broker->persist is + * NULL, so the call sites do not need to guard. + * + * Record format on the wire (each kv_put/kv_get blob): + * + * off size field + * 0 4 magic = "WMQB" + * 4 2 schema_ver = WOLFMQTT_BROKER_PERSIST_SCHEMA_VER (big endian) + * 6 1 rec_kind = namespace echo + * 7 1 wrap_mode = 0 plaintext, 1 AES-GCM + * 8 4 body_len (big endian) + * 12 ... body (encoding depends on namespace and wrap_mode) + * + * The body encoding is intentionally simple: fixed-width header fields + * first, variable-length strings/payloads last, lengths prefixed + * big-endian. Forward compatibility is by schema-version bump + wipe. + * wrap_mode is bound as AAD by the AES-GCM path so a tamper that + * flips it (or flips rec_kind) fails the tag check. + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include "wolfmqtt/mqtt_client.h" +#include "wolfmqtt/mqtt_broker.h" + +#ifdef WOLFMQTT_BROKER_PERSIST + +/* The broker has private static structs we need to inspect (BrokerSub, + * BrokerRetainedMsg, BrokerOutPub). Pull them in by including the public + * header which exposes the typedefs. mqtt_broker.c defines static + * helpers we cannot reach from here, but we don't need them: the + * persist layer takes already-snapshot data via its function arguments. */ + +/* Local mirror of the WBLOG_* macros from mqtt_broker.c. Both files + * are linked into the same broker binary; sharing a header for the + * macros would force every embedder of wolfmqtt to inherit them, so + * keep them file-local. */ +#ifdef WOLFMQTT_BROKER_NO_LOG + #define WMQB_LOG_ERR(b, ...) do { (void)(b); } while(0) + #define WMQB_LOG_INFO(b, ...) do { (void)(b); } while(0) +#else + #define WMQB_LOG(b, level, ...) \ + do { if ((b)->log_level >= (level)) PRINTF(__VA_ARGS__); } while(0) + #define WMQB_LOG_ERR(b, ...) WMQB_LOG(b, BROKER_LOG_ERROR, __VA_ARGS__) + #define WMQB_LOG_INFO(b, ...) WMQB_LOG(b, BROKER_LOG_INFO, __VA_ARGS__) +#endif + +/* Time abstraction. Mirrors the fallback in src/mqtt_broker.c so this + * translation unit doesn't depend on a private header. Override via + * user_settings.h same as the broker core does. */ +#ifndef WOLFMQTT_BROKER_GET_TIME_S + #if defined(WOLFMQTT_WOLFIP) + #error "WOLFMQTT_WOLFIP requires WOLFMQTT_BROKER_GET_TIME_S to be defined" + #else + #include + #define WOLFMQTT_BROKER_GET_TIME_S() \ + ((WOLFMQTT_BROKER_TIME_T)time(NULL)) + #endif +#endif + +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + #include + #include + #define WMQB_AES_KEY_LEN 32 + #define WMQB_GCM_NONCE_LEN 12 + #define WMQB_GCM_TAG_LEN 16 +#endif + +#define WMQB_HDR_LEN 12 + +/* Build's expected wrap_mode (byte 7 of every record header). Toggling + * --enable-broker-persist-encrypt changes this value so a directory + * written by the other build is rejected via the schema-mismatch wipe + * path on next startup. */ +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + #define WMQB_WRAP_MODE WOLFMQTT_BROKER_PERSIST_WRAP_AES_GCM +#else + #define WMQB_WRAP_MODE WOLFMQTT_BROKER_PERSIST_WRAP_PLAIN +#endif + +/* Big-endian numeric writers (no host-byte-order dependency in the + * stored bytes, so a record written on x86 can be read on any platform). */ +static WC_INLINE void wmqb_w_u16(byte* p, word16 v) +{ + p[0] = (byte)((v >> 8) & 0xFF); + p[1] = (byte)(v & 0xFF); +} +static WC_INLINE void wmqb_w_u32(byte* p, word32 v) +{ + p[0] = (byte)((v >> 24) & 0xFF); + p[1] = (byte)((v >> 16) & 0xFF); + p[2] = (byte)((v >> 8) & 0xFF); + p[3] = (byte)(v & 0xFF); +} +static WC_INLINE void wmqb_w_u64(byte* p, word64 v) +{ + p[0] = (byte)((v >> 56) & 0xFF); + p[1] = (byte)((v >> 48) & 0xFF); + p[2] = (byte)((v >> 40) & 0xFF); + p[3] = (byte)((v >> 32) & 0xFF); + p[4] = (byte)((v >> 24) & 0xFF); + p[5] = (byte)((v >> 16) & 0xFF); + p[6] = (byte)((v >> 8) & 0xFF); + p[7] = (byte)(v & 0xFF); +} +static WC_INLINE word16 wmqb_r_u16(const byte* p) +{ + return (word16)(((word16)p[0] << 8) | (word16)p[1]); +} +static WC_INLINE word32 wmqb_r_u32(const byte* p) +{ + return ((word32)p[0] << 24) | ((word32)p[1] << 16) | + ((word32)p[2] << 8) | (word32)p[3]; +} +static WC_INLINE word64 wmqb_r_u64(const byte* p) +{ + return ((word64)p[0] << 56) | ((word64)p[1] << 48) | + ((word64)p[2] << 40) | ((word64)p[3] << 32) | + ((word64)p[4] << 24) | ((word64)p[5] << 16) | + ((word64)p[6] << 8) | (word64)p[7]; +} + +/* Write the 12-byte record header. Caller guarantees buf has room. + * rec_kind is one of BROKER_PERSIST_NS_* and fits in a single byte + * (values are all < 0x80 by design). */ +static void wmqb_write_header(byte* buf, word16 rec_kind, word32 body_len) +{ + buf[0] = WOLFMQTT_BROKER_PERSIST_MAGIC0; + buf[1] = WOLFMQTT_BROKER_PERSIST_MAGIC1; + buf[2] = WOLFMQTT_BROKER_PERSIST_MAGIC2; + buf[3] = WOLFMQTT_BROKER_PERSIST_MAGIC3; + wmqb_w_u16(&buf[4], (word16)WOLFMQTT_BROKER_PERSIST_SCHEMA_VER); + buf[6] = (byte)(rec_kind & 0xFF); + buf[7] = (byte)WMQB_WRAP_MODE; + wmqb_w_u32(&buf[8], body_len); +} + +/* Validate header against this build's schema. Returns 0 on match, + * negative on magic or version mismatch. Body length is returned via + * out_body_len. */ +static WC_INLINE int wmqb_read_header(const byte* buf, word32 buf_len, + word16 expect_kind, word32* out_body_len) +{ + word32 body_len; + + if (buf_len < WMQB_HDR_LEN) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (buf[0] != WOLFMQTT_BROKER_PERSIST_MAGIC0 || + buf[1] != WOLFMQTT_BROKER_PERSIST_MAGIC1 || + buf[2] != WOLFMQTT_BROKER_PERSIST_MAGIC2 || + buf[3] != WOLFMQTT_BROKER_PERSIST_MAGIC3) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (wmqb_r_u16(&buf[4]) != + (word16)WOLFMQTT_BROKER_PERSIST_SCHEMA_VER) { + return MQTT_CODE_ERROR_BAD_ARG; + } + if (buf[6] != (byte)(expect_kind & 0xFF)) { + return MQTT_CODE_ERROR_BAD_ARG; + } + if (buf[7] != (byte)WMQB_WRAP_MODE) { + /* Build expected plaintext but found encrypted record, or + * vice versa. Treat as schema mismatch so the caller wipes. */ + return MQTT_CODE_ERROR_BAD_ARG; + } + /* Read body length into a local so the bounds check works whether + * out_body_len is NULL or not. Callers that only validate the + * header (kind/version) without inspecting body length pass NULL. */ + body_len = wmqb_r_u32(&buf[8]); + if (body_len > (buf_len - WMQB_HDR_LEN)) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (out_body_len != NULL) { + *out_body_len = body_len; + } + return 0; +} + +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT +/* Lazy-init key cache. Single-threaded broker - no lock needed. The + * application-provided derive_key hook fills 32 bytes on first request. + * The cache lives on the MqttBroker (broker->persist_key_cache / + * broker->persist_key_loaded) so multiple broker instances in one + * process don't share key material, and MqttBroker_Free can ForceZero + * the cached key on teardown. */ +static int wmqb_get_key(MqttBroker* broker) +{ + const MqttBrokerPersistHooks* h; + if (broker == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + if (broker->persist_key_loaded) { + return 0; + } + h = broker->persist; + if (h == NULL || h->derive_key == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + if (h->derive_key(h->ctx, broker->persist_key_cache, + (word32)sizeof(broker->persist_key_cache)) != 0) { + return MQTT_CODE_ERROR_SYSTEM; + } + broker->persist_key_loaded = 1; + return 0; +} + +/* Wrap a plaintext record (header(12) + body) into an encrypted blob: + * header(12) || nonce(12) || ct(body_len) || tag(16) + * Header is passed unencrypted but is bound as AAD so any tamper of + * the namespace / body_len fields fails the tag check. Caller must + * free the returned buffer. + * + * TODO(perf): each call here pays a fresh wc_AesInit + wc_AesGcmSetKey + * + wc_AesFree cycle. For bursty workloads (every PUBLISH on the orphan + * path triggers an encrypt), caching an Aes context on the broker and + * rekeying only when persist_key_cache changes would amortize the AES + * key schedule. Requires deciding the threading model first - today + * the broker is single-threaded so a single shared context is safe. */ +static int wmqb_encrypt_blob(MqttBroker* broker, + const byte* plain, word32 plain_len, byte** ct_out, word32* ct_out_len) +{ + Aes aes; + WC_RNG rng; + byte* out; + word32 body_len; + int rc; + + if (broker == NULL || plain == NULL || plain_len < WMQB_HDR_LEN || + ct_out == NULL || ct_out_len == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + rc = wmqb_get_key(broker); + if (rc != 0) { + return rc; + } + body_len = plain_len - WMQB_HDR_LEN; + *ct_out_len = WMQB_HDR_LEN + WMQB_GCM_NONCE_LEN + body_len + + WMQB_GCM_TAG_LEN; + out = (byte*)WOLFMQTT_MALLOC(*ct_out_len); + if (out == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + XMEMCPY(out, plain, WMQB_HDR_LEN); + if (wc_InitRng(&rng) != 0) { + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_SYSTEM; + } + if (wc_RNG_GenerateBlock(&rng, + out + WMQB_HDR_LEN, WMQB_GCM_NONCE_LEN) != 0) { + wc_FreeRng(&rng); + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_SYSTEM; + } + wc_FreeRng(&rng); + + if (wc_AesInit(&aes, NULL, INVALID_DEVID) != 0) { + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_SYSTEM; + } + if (wc_AesGcmSetKey(&aes, broker->persist_key_cache, + (word32)sizeof(broker->persist_key_cache)) != 0) { + wc_AesFree(&aes); + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_SYSTEM; + } + if (wc_AesGcmEncrypt(&aes, + out + WMQB_HDR_LEN + WMQB_GCM_NONCE_LEN, /* ct */ + plain + WMQB_HDR_LEN, body_len, /* plaintext */ + out + WMQB_HDR_LEN, WMQB_GCM_NONCE_LEN, /* nonce */ + out + WMQB_HDR_LEN + WMQB_GCM_NONCE_LEN + body_len, /* tag */ + WMQB_GCM_TAG_LEN, + plain, WMQB_HDR_LEN) != 0) { /* aad = header */ + wc_AesFree(&aes); + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_SYSTEM; + } + wc_AesFree(&aes); + *ct_out = out; + return 0; +} + +/* Reverse of wmqb_encrypt_blob. Caller must free the returned plain. + * TODO(perf): see wmqb_encrypt_blob - same per-call key-schedule cost. */ +static int wmqb_decrypt_blob(MqttBroker* broker, + const byte* ct, word32 ct_len, byte** plain_out, word32* plain_out_len) +{ + Aes aes; + byte* out; + word32 body_len; + int rc; + + if (broker == NULL || ct == NULL || + ct_len < (word32)(WMQB_HDR_LEN + WMQB_GCM_NONCE_LEN + + WMQB_GCM_TAG_LEN) || + plain_out == NULL || plain_out_len == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + rc = wmqb_get_key(broker); + if (rc != 0) { + return rc; + } + body_len = ct_len - WMQB_HDR_LEN - WMQB_GCM_NONCE_LEN - + WMQB_GCM_TAG_LEN; + *plain_out_len = WMQB_HDR_LEN + body_len; + out = (byte*)WOLFMQTT_MALLOC(*plain_out_len); + if (out == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + XMEMCPY(out, ct, WMQB_HDR_LEN); + + if (wc_AesInit(&aes, NULL, INVALID_DEVID) != 0) { + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_SYSTEM; + } + if (wc_AesGcmSetKey(&aes, broker->persist_key_cache, + (word32)sizeof(broker->persist_key_cache)) != 0) { + wc_AesFree(&aes); + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_SYSTEM; + } + if (wc_AesGcmDecrypt(&aes, + out + WMQB_HDR_LEN, + ct + WMQB_HDR_LEN + WMQB_GCM_NONCE_LEN, body_len, + ct + WMQB_HDR_LEN, WMQB_GCM_NONCE_LEN, + ct + WMQB_HDR_LEN + WMQB_GCM_NONCE_LEN + body_len, + WMQB_GCM_TAG_LEN, + ct, WMQB_HDR_LEN) != 0) { + wc_AesFree(&aes); + WOLFMQTT_FREE(out); + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + wc_AesFree(&aes); + *plain_out = out; + return 0; +} +#endif /* WOLFMQTT_BROKER_PERSIST_ENCRYPT */ + +/* Commit a blob to the backend and sync if available. Returns the hook's + * return code, or 0 if hooks are disabled (silent no-op). When persist + * encryption is enabled, the blob is wrapped here so callers can keep + * passing plaintext (header + body). */ +static int wmqb_kv_put_commit(MqttBroker* broker, byte ns, + const byte* key, word16 key_len, const byte* blob, word32 blob_len) +{ + int rc; + const MqttBrokerPersistHooks* h; + if (broker == NULL) { + return 0; + } + h = broker->persist; + if (h == NULL || h->kv_put == NULL) { + return 0; + } +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + { + byte* enc; + word32 enc_len; + rc = wmqb_encrypt_blob(broker, blob, blob_len, &enc, &enc_len); + if (rc != 0) { + return rc; + } + rc = h->kv_put(h->ctx, ns, key, key_len, enc, enc_len); + WOLFMQTT_FREE(enc); + } +#else + rc = h->kv_put(h->ctx, ns, key, key_len, blob, blob_len); +#endif + if (rc == 0 && h->sync != NULL) { + (void)h->sync(h->ctx); + } + return rc; +} + +static int wmqb_kv_del_commit(MqttBroker* broker, byte ns, + const byte* key, word16 key_len) +{ + int rc; + const MqttBrokerPersistHooks* h; + if (broker == NULL) { + return 0; + } + h = broker->persist; + if (h == NULL || h->kv_del == NULL) { + return 0; + } + rc = h->kv_del(h->ctx, ns, key, key_len); + if (rc == 0 && h->sync != NULL) { + (void)h->sync(h->ctx); + } + return rc; +} + +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT +/* iter callback wrapper: decrypts each blob before delegating to the + * "real" callback. Failures (bad tag) are logged via the iter cb's + * skipped counter convention - we forward 0-len which the inner cb + * treats as malformed. The persist context passed to the inner cb is + * augmented to carry the original cb pointer and ctx. */ +struct wmqb_iter_decrypt_ctx { + MqttBroker* broker; + MqttBrokerPersist_IterCb inner_cb; + void* inner_ctx; +}; + +static int wmqb_iter_decrypt_cb(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx) +{ + struct wmqb_iter_decrypt_ctx* d = + (struct wmqb_iter_decrypt_ctx*)cb_ctx; + byte* plain; + word32 plain_len; + int rc; + int stop; + + rc = wmqb_decrypt_blob(d->broker, blob, blob_len, &plain, &plain_len); + if (rc != 0) { + /* Forward an empty blob; inner cb will read_header-fail and + * bump its skipped counter. */ + return d->inner_cb(key, key_len, blob, 0, d->inner_ctx); + } + stop = d->inner_cb(key, key_len, plain, plain_len, d->inner_ctx); + WOLFMQTT_FREE(plain); + return stop; +} + +/* Drop-in replacement for h->kv_iter that decrypts each blob. */ +static int wmqb_iter_decrypt(MqttBroker* broker, byte ns, + MqttBrokerPersist_IterCb cb, void* cb_ctx) +{ + struct wmqb_iter_decrypt_ctx wrap; + const MqttBrokerPersistHooks* h; + if (broker == NULL || cb == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + h = broker->persist; + if (h == NULL || h->kv_iter == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + wrap.broker = broker; + wrap.inner_cb = cb; + wrap.inner_ctx = cb_ctx; + return h->kv_iter(h->ctx, ns, wmqb_iter_decrypt_cb, &wrap); +} + +/* Decrypt-on-get for the META record specifically. The 256-byte stack + * buffer is sized for META's encrypted layout (12 hdr + 12 nonce + 4 + * body + 16 tag = 44 bytes). Do NOT promote this to a generic helper - + * any namespace with a larger record will quietly fail OUT_OF_BUFFER + * (or trip the kv_get backend's own truncation behavior). New callers + * should add a sized variant rather than widen this one. */ +static int wmqb_kv_get_decrypt_meta(MqttBroker* broker, byte ns, + const byte* key, word16 key_len, byte* out, word32* inout_len) +{ + /* Read encrypted into a temp, decrypt, then copy plaintext into + * caller buffer. Caller's buffer should be at least + * (encrypted_len - nonce - tag). */ + byte enc[256]; + word32 cap = sizeof(enc); + byte* plain; + word32 plain_len; + int rc; + const MqttBrokerPersistHooks* h; + + if (broker == NULL || out == NULL || inout_len == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + h = broker->persist; + if (h == NULL || h->kv_get == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + /* Compile-time sanity: WMQB_HDR_LEN(12) + nonce(12) + body + tag(16) + * must fit in enc[256]. With ns==BROKER_PERSIST_NS_META the + * plaintext body is 4 bytes -> 12+12+4+16 = 44 bytes. Plenty. If + * a new caller routes through this with a larger record, this + * limit needs revisiting. */ + rc = h->kv_get(h->ctx, ns, key, key_len, enc, &cap); + if (rc != 0) { + return rc; + } + rc = wmqb_decrypt_blob(broker, enc, cap, &plain, &plain_len); + if (rc != 0) { + return rc; + } + if (plain_len > *inout_len) { + WOLFMQTT_FREE(plain); + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + XMEMCPY(out, plain, plain_len); + *inout_len = plain_len; + WOLFMQTT_FREE(plain); + return 0; +} +#endif /* WOLFMQTT_BROKER_PERSIST_ENCRYPT */ + +/* Iter helper used by the restore code. When encryption is enabled, + * wraps the callback to decrypt each blob; otherwise calls kv_iter + * directly. */ +static int wmqb_kv_iter(MqttBroker* broker, byte ns, + MqttBrokerPersist_IterCb cb, void* cb_ctx) +{ + const MqttBrokerPersistHooks* h; + if (broker == NULL || cb == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + h = broker->persist; + if (h == NULL || h->kv_iter == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + return wmqb_iter_decrypt(broker, ns, cb, cb_ctx); +#else + return h->kv_iter(h->ctx, ns, cb, cb_ctx); +#endif +} + +/* -------------------------------------------------------------------------- */ +/* Public API */ +/* -------------------------------------------------------------------------- */ +int MqttBroker_SetPersistHooks(MqttBroker* broker, + const MqttBrokerPersistHooks* hooks) +{ + if (broker == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + broker->persist = hooks; + return MQTT_CODE_SUCCESS; +} + +/* -------------------------------------------------------------------------- */ +/* Shadow-write helpers (PutSession / PutSubs / PutRetained / PutOutPub / + * Del* / Restore). Each helper: + * 1) bails immediately if broker->persist is NULL, + * 2) snapshots the relevant in-memory state into a heap buffer, + * 3) commits via wmqb_kv_put_commit / wmqb_kv_del_commit. + * + * Restore is implemented in P6. */ +/* -------------------------------------------------------------------------- */ + +/* Snapshot a connected client into a persisted session record and commit. + * + * Body layout: + * off size field + * 0 1 protocol_level + * 1 1 _reserved (0) + * 2 4 session_expiry_sec (big endian; 0xFFFFFFFF = never) + * 6 2 client_id_len (big endian) + * 8 N client_id (no NUL terminator on the wire) + * + * Key is the client_id bytes. */ +int BrokerPersist_PutSession(MqttBroker* broker, + const struct BrokerClient* bc) +{ + const BrokerClient* c = (const BrokerClient*)bc; + const char* cid; + word16 cid_len; + word32 body_len; + word32 total_len; + byte* buf; + int rc; + + if (broker == NULL || broker->persist == NULL || c == NULL) { + return 0; + } + /* Only persist sessions whose owner had a non-empty client_id and + * connected with clean_session=0 (the spec's persistent-session + * marker). Callers that want to evict use BrokerPersist_DelSession. */ + cid = c->client_id; + if (cid == NULL || *cid == '\0') { + return 0; + } + cid_len = (word16)XSTRLEN(cid); + + body_len = 1 + 1 + 4 + 2 + cid_len; + total_len = WMQB_HDR_LEN + body_len; + buf = (byte*)WOLFMQTT_MALLOC(total_len); + if (buf == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + wmqb_write_header(buf, BROKER_PERSIST_NS_SESSION, body_len); + buf[WMQB_HDR_LEN + 0] = c->protocol_level; + buf[WMQB_HDR_LEN + 1] = 0; + /* Session Expiry plumbed from CONNECT (v5 property) or defaulted to + * 0xFFFFFFFF (never expire) for clean_session=0 v3.1.1 sessions. */ +#ifndef WOLFMQTT_STATIC_MEMORY + wmqb_w_u32(&buf[WMQB_HDR_LEN + 2], c->session_expiry_sec); +#else + wmqb_w_u32(&buf[WMQB_HDR_LEN + 2], 0xFFFFFFFFu); +#endif + wmqb_w_u16(&buf[WMQB_HDR_LEN + 6], cid_len); + XMEMCPY(&buf[WMQB_HDR_LEN + 8], cid, cid_len); + + rc = wmqb_kv_put_commit(broker, BROKER_PERSIST_NS_SESSION, + (const byte*)cid, cid_len, buf, total_len); + WOLFMQTT_FREE(buf); + return rc; +} + +int BrokerPersist_DelSession(MqttBroker* broker, const char* client_id) +{ + if (broker == NULL || broker->persist == NULL || client_id == NULL) { + return 0; + } + return wmqb_kv_del_commit(broker, BROKER_PERSIST_NS_SESSION, + (const byte*)client_id, (word16)XSTRLEN(client_id)); +} + +/* Snapshot every BrokerSub bound to client_id into a single per-client + * subscription record. + * + * Body layout: + * off size field + * 0 2 count (big endian; 0 == "no subs", caller may DelSubs instead) + * 2 ... N entries, each: + * 1 qos + * 1 options (reserved for v5 NL/RAP/RH bits) + * 2 filter_len (big endian) + * N filter (no NUL) + */ +int BrokerPersist_PutSubs(MqttBroker* broker, const char* client_id) +{ + word16 cid_len; + word32 body_len; + word32 total_len; + byte* buf; + byte* p; + word16 count = 0; + int rc; +#ifdef WOLFMQTT_STATIC_MEMORY + int i; +#else + const BrokerSub* sub; +#endif + + if (broker == NULL || broker->persist == NULL || client_id == NULL || + *client_id == '\0') { + return 0; + } + { + size_t raw = XSTRLEN(client_id); + /* word16 downcast - reject lengths that wouldn't fit instead + * of silently truncating. MQTT v3.1.1 caps client_id at 23 + * bytes; v5 caps at 65535 (i.e., fits in word16). Anything + * longer is malformed input from a caller. */ + if (raw == 0 || raw > 0xFFFFu) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + cid_len = (word16)raw; + } + + /* Pass 1: count + size */ + body_len = 2; +#ifdef WOLFMQTT_STATIC_MEMORY + for (i = 0; i < BROKER_MAX_SUBS; i++) { + const BrokerSub* s = &broker->subs[i]; + if (!s->in_use) { + continue; + } + if (XSTRCMP(s->client_id, client_id) != 0) { + continue; + } + count++; + body_len += 1 + 1 + 2 + (word32)XSTRLEN(s->filter); + } +#else + for (sub = broker->subs; sub != NULL; sub = sub->next) { + if (sub->client_id == NULL || + XSTRCMP(sub->client_id, client_id) != 0) { + continue; + } + if (sub->filter == NULL) { + continue; + } + count++; + body_len += 1 + 1 + 2 + (word32)XSTRLEN(sub->filter); + } +#endif + + if (count == 0) { + /* Caller did all the unsubscribes; remove the record entirely. */ + return wmqb_kv_del_commit(broker, BROKER_PERSIST_NS_SUBS, + (const byte*)client_id, cid_len); + } + + total_len = WMQB_HDR_LEN + body_len; + buf = (byte*)WOLFMQTT_MALLOC(total_len); + if (buf == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + wmqb_write_header(buf, BROKER_PERSIST_NS_SUBS, body_len); + p = &buf[WMQB_HDR_LEN]; + wmqb_w_u16(p, count); p += 2; + +#ifdef WOLFMQTT_STATIC_MEMORY + for (i = 0; i < BROKER_MAX_SUBS; i++) { + const BrokerSub* s = &broker->subs[i]; + word16 flen; + if (!s->in_use) { + continue; + } + if (XSTRCMP(s->client_id, client_id) != 0) { + continue; + } + flen = (word16)XSTRLEN(s->filter); + *p++ = (byte)s->qos; + *p++ = 0; /* reserved */ + wmqb_w_u16(p, flen); p += 2; + XMEMCPY(p, s->filter, flen); p += flen; + } +#else + for (sub = broker->subs; sub != NULL; sub = sub->next) { + word16 flen; + if (sub->client_id == NULL || + XSTRCMP(sub->client_id, client_id) != 0 || + sub->filter == NULL) { + continue; + } + flen = (word16)XSTRLEN(sub->filter); + *p++ = (byte)sub->qos; + *p++ = 0; /* reserved */ + wmqb_w_u16(p, flen); p += 2; + XMEMCPY(p, sub->filter, flen); p += flen; + } +#endif + + rc = wmqb_kv_put_commit(broker, BROKER_PERSIST_NS_SUBS, + (const byte*)client_id, cid_len, buf, total_len); + WOLFMQTT_FREE(buf); + return rc; +} + +int BrokerPersist_DelSubs(MqttBroker* broker, const char* client_id) +{ + if (broker == NULL || broker->persist == NULL || client_id == NULL) { + return 0; + } + return wmqb_kv_del_commit(broker, BROKER_PERSIST_NS_SUBS, + (const byte*)client_id, (word16)XSTRLEN(client_id)); +} + +/* Snapshot a retained message into a persisted record. + * + * Body layout: + * off size field + * 0 1 qos + * 1 1 _reserved + * 2 8 store_time (big endian, seconds since epoch) + * 10 4 expiry_sec (big endian, 0 == no expiry) + * 14 2 topic_len (big endian) + * 16 N topic + * 16+N 4 payload_len (big endian) + * 20+N M payload + * + * Key is the topic bytes. */ +int BrokerPersist_PutRetained(MqttBroker* broker, + const struct BrokerRetainedMsg* rm) +{ + const BrokerRetainedMsg* m = (const BrokerRetainedMsg*)rm; + const char* topic; + word16 topic_len; + word32 payload_len; + const byte* payload; + word32 body_len; + word32 total_len; + byte* buf; + byte* p; + int rc; + + if (broker == NULL || broker->persist == NULL || m == NULL) { + return 0; + } + topic = m->topic; + if (topic == NULL || *topic == '\0') { + return 0; + } + topic_len = (word16)XSTRLEN(topic); + payload_len = m->payload_len; + payload = m->payload; + + body_len = 1 + 1 + 8 + 4 + 2 + topic_len + 4 + payload_len; + total_len = WMQB_HDR_LEN + body_len; + buf = (byte*)WOLFMQTT_MALLOC(total_len); + if (buf == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + wmqb_write_header(buf, BROKER_PERSIST_NS_RETAINED, body_len); + p = &buf[WMQB_HDR_LEN]; + *p++ = (byte)m->qos; + *p++ = 0; + wmqb_w_u64(p, (word64)m->store_time); p += 8; + wmqb_w_u32(p, m->expiry_sec); p += 4; + wmqb_w_u16(p, topic_len); p += 2; + XMEMCPY(p, topic, topic_len); p += topic_len; + wmqb_w_u32(p, payload_len); p += 4; + if (payload_len > 0 && payload != NULL) { + XMEMCPY(p, payload, payload_len); + } + + rc = wmqb_kv_put_commit(broker, BROKER_PERSIST_NS_RETAINED, + (const byte*)topic, topic_len, buf, total_len); + WOLFMQTT_FREE(buf); + return rc; +} + +int BrokerPersist_DelRetained(MqttBroker* broker, const char* topic) +{ + if (broker == NULL || broker->persist == NULL || topic == NULL) { + return 0; + } + return wmqb_kv_del_commit(broker, BROKER_PERSIST_NS_RETAINED, + (const byte*)topic, (word16)XSTRLEN(topic)); +} + +/* OUTQ key encoding: client_id_bytes || 0x00 || packet_id_be(2 bytes). + * The trailing 0x00 separator + fixed-width packet_id keep the key + * deterministic for any client_id (which can itself contain arbitrary + * UTF-8). 0x00 is illegal in MQTT-valid client_ids + * ([MQTT-3.1.3-5] rejects null chars), so the separator is unambiguous. */ +static int wmqb_outq_build_key(const char* client_id, word16 packet_id, + byte* out_key, word16 out_cap, word16* out_len) +{ + size_t cid_len; + if (client_id == NULL || out_key == NULL || out_len == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + cid_len = XSTRLEN(client_id); + if (cid_len + 1 + 2 > out_cap) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + XMEMCPY(out_key, client_id, cid_len); + out_key[cid_len] = 0x00; + wmqb_w_u16(&out_key[cid_len + 1], packet_id); + *out_len = (word16)(cid_len + 3); + return 0; +} + +/* Snapshot a single outbound-queue entry into a persisted record. + * + * Body layout: + * off size field + * 0 1 state (BROKER_OUTQ_*) + * 1 1 qos + * 2 1 retain + * 3 1 protocol_level + * 4 2 _reserved + * 6 2 packet_id (redundant with key, simplifies decode) + * 8 8 enq_time + * 16 4 expiry_sec + * 20 2 topic_len + * 22 N topic + * 22+N 4 payload_len + * 26+N M payload */ +int BrokerPersist_PutOutPub(MqttBroker* broker, const char* client_id, + const struct BrokerOutPub* e) +{ +#ifdef WOLFMQTT_STATIC_MEMORY + /* BrokerOutPub is dynamic-memory only; static-memory builds keep + * synchronous fan-out and therefore have no outbound queue to + * persist. The function symbol still exists so static and dynamic + * builds share the same ABI. */ + (void)broker; (void)client_id; (void)e; + return 0; +#else + const BrokerOutPub* p_e = (const BrokerOutPub*)e; + word16 topic_len; + word32 payload_len; + word32 body_len; + word32 total_len; + byte* buf; + byte* bp; + byte key[256 + 3]; + word16 key_len; + int rc; + + if (broker == NULL || broker->persist == NULL || + client_id == NULL || p_e == NULL) { + return 0; + } + /* QoS 0 entries are not persisted per [MQTT-3.3.1-3]; if they + * leak in here, no-op. */ + if (p_e->qos == MQTT_QOS_0) { + return 0; + } + if (p_e->topic == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + topic_len = (word16)XSTRLEN(p_e->topic); + payload_len = p_e->payload_len; + + rc = wmqb_outq_build_key(client_id, p_e->packet_id, key, sizeof(key), + &key_len); + if (rc != 0) { + return rc; + } + body_len = 1 + 1 + 1 + 1 + 2 + 2 + 8 + 4 + 2 + topic_len + 4 + + payload_len; + total_len = WMQB_HDR_LEN + body_len; + buf = (byte*)WOLFMQTT_MALLOC(total_len); + if (buf == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + wmqb_write_header(buf, BROKER_PERSIST_NS_OUTQ, body_len); + bp = &buf[WMQB_HDR_LEN]; + *bp++ = p_e->state; + *bp++ = (byte)p_e->qos; + *bp++ = p_e->retain; + *bp++ = p_e->protocol_level; + *bp++ = 0; *bp++ = 0; /* _reserved */ + wmqb_w_u16(bp, p_e->packet_id); bp += 2; + wmqb_w_u64(bp, (word64)p_e->enq_time); bp += 8; + wmqb_w_u32(bp, p_e->expiry_sec); bp += 4; + wmqb_w_u16(bp, topic_len); bp += 2; + XMEMCPY(bp, p_e->topic, topic_len); bp += topic_len; + wmqb_w_u32(bp, payload_len); bp += 4; + if (payload_len > 0 && p_e->payload != NULL) { + XMEMCPY(bp, p_e->payload, payload_len); + } + + rc = wmqb_kv_put_commit(broker, BROKER_PERSIST_NS_OUTQ, + key, key_len, buf, total_len); + WOLFMQTT_FREE(buf); + return rc; +#endif /* WOLFMQTT_STATIC_MEMORY */ +} + +int BrokerPersist_DelOutPub(MqttBroker* broker, const char* client_id, + word16 packet_id) +{ + byte key[256 + 3]; + word16 key_len; + int rc; + if (broker == NULL || broker->persist == NULL || client_id == NULL) { + return 0; + } + rc = wmqb_outq_build_key(client_id, packet_id, key, sizeof(key), + &key_len); + if (rc != 0) { + return rc; + } + return wmqb_kv_del_commit(broker, BROKER_PERSIST_NS_OUTQ, + key, key_len); +} + +#ifndef WOLFMQTT_STATIC_MEMORY +/* Key-collection list used by DelOutQueue and the schema-wipe iter. A + * single linked node of (key bytes, len) so iter callbacks can stash + * keys and the caller can del them after iteration finishes. */ +struct wmqb_wipe_key { + word16 key_len; + byte* key; + struct wmqb_wipe_key* next; +}; + +/* DelOutQueue iterator context: matches against client_id prefix and + * collects keys to delete after iteration completes. */ +struct wmqb_delq_ctx { + const byte* cid; + word16 cid_len; + struct wmqb_wipe_key* head; +}; + +static int wmqb_delq_iter_cb(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx) +{ + struct wmqb_delq_ctx* dq = (struct wmqb_delq_ctx*)cb_ctx; + struct wmqb_wipe_key* node; + (void)blob; (void)blob_len; + /* Match key prefix: cid bytes followed by 0x00. */ + if (key_len < (word16)(dq->cid_len + 1)) { + return 0; + } + if (XMEMCMP(key, dq->cid, dq->cid_len) != 0) { + return 0; + } + if (key[dq->cid_len] != 0x00) { + return 0; + } + node = (struct wmqb_wipe_key*)WOLFMQTT_MALLOC(sizeof(*node)); + if (node == NULL) { + return 1; + } + node->key = (byte*)WOLFMQTT_MALLOC(key_len); + if (node->key == NULL) { + WOLFMQTT_FREE(node); + return 1; + } + XMEMCPY(node->key, key, key_len); + node->key_len = key_len; + node->next = dq->head; + dq->head = node; + return 0; +} +#endif + +int BrokerPersist_DelOutQueue(MqttBroker* broker, const char* client_id) +{ + if (broker == NULL || broker->persist == NULL || client_id == NULL) { + return 0; + } +#ifdef WOLFMQTT_STATIC_MEMORY + /* Static-memory backends typically lack a key-prefix iterator; the + * orphan queue feature is dynamic-memory only in v1. */ + (void)client_id; + return 0; +#else + { + const MqttBrokerPersistHooks* h = broker->persist; + struct wmqb_delq_ctx ctx; + struct wmqb_wipe_key* cur; + int deleted = 0; + if (h->kv_iter == NULL || h->kv_del == NULL) { + return 0; + } + XMEMSET(&ctx, 0, sizeof(ctx)); + ctx.cid = (const byte*)client_id; + ctx.cid_len = (word16)XSTRLEN(client_id); + /* DelOutQueue only needs keys, not blob bodies - the wipe-key + * iterator callback ignores blob bytes - so bypassing the + * decrypt wrapper here is safe and avoids unnecessary AES + * cycles. */ + (void)h->kv_iter(h->ctx, BROKER_PERSIST_NS_OUTQ, + wmqb_delq_iter_cb, &ctx); + cur = ctx.head; + while (cur != NULL) { + struct wmqb_wipe_key* next = cur->next; + if (h->kv_del(h->ctx, BROKER_PERSIST_NS_OUTQ, cur->key, + cur->key_len) == 0) { + deleted++; + } + WOLFMQTT_FREE(cur->key); + WOLFMQTT_FREE(cur); + cur = next; + } + if (h->sync != NULL) { + (void)h->sync(h->ctx); + } + return deleted; + } +#endif +} + +/* META record. Body = 4-byte big-endian schema_ver (redundant with the + * header check, but lets a stand-alone tool inspect the file without + * knowing the broker's header format). Key is the single zero byte. */ +static int wmqb_meta_check(MqttBroker* broker, int* out_present) +{ + const MqttBrokerPersistHooks* h = broker->persist; + const byte meta_key = 0; + byte buf[WMQB_HDR_LEN + 4]; + word32 cap = sizeof(buf); + int rc; + word32 body_len = 0; + + if (out_present != NULL) { + *out_present = 0; + } + if (h->kv_get == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + rc = wmqb_kv_get_decrypt_meta(broker, BROKER_PERSIST_NS_META, + &meta_key, 1, buf, &cap); +#else + rc = h->kv_get(h->ctx, BROKER_PERSIST_NS_META, &meta_key, 1, buf, &cap); +#endif + if (rc == MQTT_CODE_ERROR_NOT_FOUND) { + /* Genuine "first run" - the META record simply does not exist. + * meta_present stays 0; caller stamps a fresh META. */ + return 0; + } + if (rc == MQTT_CODE_ERROR_OUT_OF_BUFFER) { + /* Schema mismatch on the encrypted path: an encrypted META read + * by a plaintext build (or vice versa) trips the size / decrypt + * check. Surface as MALFORMED_DATA so the caller's wipe-and- + * restart branch fires. */ + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (rc != 0) { + /* Real backend failure (I/O error, permission denied, etc.). + * Surface to the caller so the broker refuses to start rather + * than silently masking the failure as "first run" and + * restamping META over state that may still be present once + * the backend recovers. */ + return rc; + } + if (cap < WMQB_HDR_LEN + 4) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (out_present != NULL) { + *out_present = 1; + } + return wmqb_read_header(buf, cap, BROKER_PERSIST_NS_META, &body_len); +} + +/* Write the current META record so subsequent runs detect schema match. */ +static int wmqb_meta_write(MqttBroker* broker) +{ + const byte meta_key = 0; + byte buf[WMQB_HDR_LEN + 4]; + wmqb_write_header(buf, BROKER_PERSIST_NS_META, 4); + wmqb_w_u32(&buf[WMQB_HDR_LEN], WOLFMQTT_BROKER_PERSIST_SCHEMA_VER); + return wmqb_kv_put_commit(broker, BROKER_PERSIST_NS_META, + &meta_key, 1, buf, sizeof(buf)); +} + +/* Restore iterator context. Used for retained-msg, subs, session, + * and OUTQ callbacks. */ +struct wmqb_restore_ctx { + MqttBroker* broker; + int loaded; + int skipped; +}; + +#ifndef WOLFMQTT_STATIC_MEMORY +/* Create an orphan slot from a NS_SESSION record. Does NOT call the + * shadow-write Put hook (would be circular). Returns the new orphan + * or NULL on failure. */ +static BrokerOrphanSession* wmqb_restore_create_orphan(MqttBroker* broker, + const byte* client_id, word16 cid_len, byte protocol_level, + word32 session_expiry_sec) +{ + BrokerOrphanSession* o; + if (broker == NULL || client_id == NULL || cid_len == 0) { + return NULL; + } + if (broker->orphan_session_count >= BROKER_MAX_PERSIST_SESSIONS) { + /* Cap is enforced when records are written, so the persisted + * set should fit. If it does not (e.g., macro reduced between + * runs), the oldest restored sessions get skipped. */ + return NULL; + } + o = (BrokerOrphanSession*)WOLFMQTT_MALLOC(sizeof(*o)); + if (o == NULL) { + return NULL; + } + XMEMSET(o, 0, sizeof(*o)); + o->client_id = (char*)WOLFMQTT_MALLOC((size_t)cid_len + 1); + if (o->client_id == NULL) { + WOLFMQTT_FREE(o); + return NULL; + } + XMEMCPY(o->client_id, client_id, cid_len); + o->client_id[cid_len] = '\0'; + o->protocol_level = protocol_level; + o->session_expiry_sec = session_expiry_sec; + o->orphan_since = WOLFMQTT_BROKER_GET_TIME_S(); + o->next = broker->orphan_sessions; + broker->orphan_sessions = o; + broker->orphan_session_count++; + return o; +} + +/* Locate an existing orphan by client_id (linear scan; pool is small + * by design). NULL if none. */ +static BrokerOrphanSession* wmqb_restore_find_orphan(MqttBroker* broker, + const byte* client_id, word16 cid_len) +{ + BrokerOrphanSession* cur; + if (broker == NULL || client_id == NULL) { + return NULL; + } + for (cur = broker->orphan_sessions; cur != NULL; cur = cur->next) { + if (cur->client_id == NULL) { + continue; + } + if (XSTRLEN(cur->client_id) == cid_len && + XMEMCMP(cur->client_id, client_id, cid_len) == 0) { + return cur; + } + } + return NULL; +} +#endif /* !WOLFMQTT_STATIC_MEMORY */ + +/* Allocate and insert a retained-message node from a decoded NS_RETAINED + * blob. Dynamic mode prepends a heap node onto broker->retained; static + * mode copies into the first free slot of broker->retained[]. Mirrors + * BrokerRetained_Store but without the already-exists merge logic + * (every key is fresh at startup). */ +static int wmqb_decode_and_insert_retained(MqttBroker* broker, + const byte* blob, word32 blob_len) +{ + word32 body_len = 0; + int rc; + const byte* p; + const byte* end; + word16 topic_len; + word32 payload_len; + word64 store_time; + word32 expiry; + byte qos; + + rc = wmqb_read_header(blob, blob_len, BROKER_PERSIST_NS_RETAINED, + &body_len); + if (rc != 0) { + return rc; + } + p = &blob[WMQB_HDR_LEN]; + end = p + body_len; + if ((word32)(end - p) < 1 + 1 + 8 + 4 + 2) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + qos = *p++; + p++; /* _reserved */ + store_time = wmqb_r_u64(p); p += 8; + expiry = wmqb_r_u32(p); p += 4; + topic_len = wmqb_r_u16(p); p += 2; + if ((word32)(end - p) < (word32)topic_len + 4) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + +#ifdef WOLFMQTT_STATIC_MEMORY + { + int i; + BrokerRetainedMsg* slot = NULL; + /* Use >= (not + 1 >) so a malformed topic_len == 0xFFFF on + * a word16 cannot wrap to 0 and bypass this check. */ + if (topic_len >= BROKER_MAX_TOPIC_LEN) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + for (i = 0; i < BROKER_MAX_RETAINED; i++) { + if (!broker->retained[i].in_use) { + slot = &broker->retained[i]; + break; + } + } + if (slot == NULL) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + XMEMSET(slot, 0, sizeof(*slot)); + slot->in_use = 1; + XMEMCPY(slot->topic, p, topic_len); + slot->topic[topic_len] = '\0'; + p += topic_len; + payload_len = wmqb_r_u32(p); p += 4; + if ((word32)(end - p) < payload_len || + payload_len > BROKER_MAX_PAYLOAD_LEN) { + XMEMSET(slot, 0, sizeof(*slot)); + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (payload_len > 0) { + XMEMCPY(slot->payload, p, payload_len); + } + slot->payload_len = payload_len; + slot->qos = (MqttQoS)qos; + slot->store_time = (WOLFMQTT_BROKER_TIME_T)store_time; + slot->expiry_sec = expiry; + } +#else + { + BrokerRetainedMsg* m; + m = (BrokerRetainedMsg*)WOLFMQTT_MALLOC(sizeof(*m)); + if (m == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + XMEMSET(m, 0, sizeof(*m)); + m->topic = (char*)WOLFMQTT_MALLOC((size_t)topic_len + 1); + if (m->topic == NULL) { + WOLFMQTT_FREE(m); + return MQTT_CODE_ERROR_MEMORY; + } + XMEMCPY(m->topic, p, topic_len); + m->topic[topic_len] = '\0'; + p += topic_len; + + payload_len = wmqb_r_u32(p); p += 4; + if ((word32)(end - p) < payload_len) { + WOLFMQTT_FREE(m->topic); + WOLFMQTT_FREE(m); + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (payload_len > 0) { + m->payload = (byte*)WOLFMQTT_MALLOC(payload_len); + if (m->payload == NULL) { + WOLFMQTT_FREE(m->topic); + WOLFMQTT_FREE(m); + return MQTT_CODE_ERROR_MEMORY; + } + XMEMCPY(m->payload, p, payload_len); + } + m->payload_len = payload_len; + m->qos = (MqttQoS)qos; + m->store_time = (WOLFMQTT_BROKER_TIME_T)store_time; + m->expiry_sec = expiry; + m->next = broker->retained; + broker->retained = m; + } +#endif + return 0; +} + +static int wmqb_iter_retained_cb(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx) +{ + struct wmqb_restore_ctx* c = (struct wmqb_restore_ctx*)cb_ctx; + int rc; + (void)key; (void)key_len; + rc = wmqb_decode_and_insert_retained(c->broker, blob, blob_len); + if (rc == 0) { + c->loaded++; + } + else { + c->skipped++; + } + return 0; /* always continue */ +} + +/* Allocate orphan subs from a decoded NS_SUBS blob. The blob key carries + * the client_id - subs created here have client=NULL, client_id set; + * the existing BrokerSubs_ReassociateClient path on reconnect rebinds + * them to the new BrokerClient. + * + * All-or-nothing: decode into a local working list (dynamic) or a + * tracked slot-index array (static) first, then commit on success. + * If any entry fails to decode or allocate, the partial work is rolled + * back so broker->subs (and slot.in_use flags) end up exactly as they + * were on entry. */ +static int wmqb_decode_and_insert_subs(MqttBroker* broker, + const byte* key, word16 key_len, const byte* blob, word32 blob_len) +{ + word32 body_len = 0; + int rc; + const byte* p; + const byte* end; + word16 count; + word16 i; +#ifdef WOLFMQTT_STATIC_MEMORY + /* Track slots we claimed in this call so we can release on failure. + * BROKER_MAX_SUBS bounds the working set; allocating on the stack + * keeps the failure path simple. */ + int claimed[BROKER_MAX_SUBS]; + int claimed_count = 0; + int j; +#else + BrokerSub* local_head = NULL; + BrokerSub* local_tail = NULL; +#endif + + rc = wmqb_read_header(blob, blob_len, BROKER_PERSIST_NS_SUBS, + &body_len); + if (rc != 0) { + return rc; + } + if (key == NULL || key_len == 0) { + return MQTT_CODE_ERROR_BAD_ARG; + } + p = &blob[WMQB_HDR_LEN]; + end = p + body_len; + if ((word32)(end - p) < 2) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + count = wmqb_r_u16(p); p += 2; + + for (i = 0; i < count; i++) { + byte qos; + word16 flen; + + if ((word32)(end - p) < 4) { + rc = MQTT_CODE_ERROR_MALFORMED_DATA; + goto rollback; + } + qos = *p++; + p++; /* options reserved */ + flen = wmqb_r_u16(p); p += 2; + if ((word32)(end - p) < flen) { + rc = MQTT_CODE_ERROR_MALFORMED_DATA; + goto rollback; + } + +#ifdef WOLFMQTT_STATIC_MEMORY + { + BrokerSub* slot = NULL; + int k; + /* >= (not + 1 >) so a malformed flen / key_len == 0xFFFF + * on a word16 cannot wrap to 0 and bypass this check. */ + if (flen >= BROKER_MAX_FILTER_LEN || + key_len >= BROKER_MAX_CLIENT_ID_LEN) { + rc = MQTT_CODE_ERROR_OUT_OF_BUFFER; + goto rollback; + } + for (k = 0; k < BROKER_MAX_SUBS; k++) { + if (!broker->subs[k].in_use) { + slot = &broker->subs[k]; + claimed[claimed_count++] = k; + break; + } + } + if (slot == NULL) { + rc = MQTT_CODE_ERROR_OUT_OF_BUFFER; + goto rollback; + } + XMEMSET(slot, 0, sizeof(*slot)); + slot->in_use = 1; + XMEMCPY(slot->filter, p, flen); + slot->filter[flen] = '\0'; + XMEMCPY(slot->client_id, key, key_len); + slot->client_id[key_len] = '\0'; + slot->client = NULL; /* orphan until reconnect */ + slot->qos = (MqttQoS)qos; + } + p += flen; +#else + { + BrokerSub* sub; + char* cid; + sub = (BrokerSub*)WOLFMQTT_MALLOC(sizeof(*sub)); + if (sub == NULL) { + rc = MQTT_CODE_ERROR_MEMORY; + goto rollback; + } + XMEMSET(sub, 0, sizeof(*sub)); + sub->filter = (char*)WOLFMQTT_MALLOC((size_t)flen + 1); + if (sub->filter == NULL) { + WOLFMQTT_FREE(sub); + rc = MQTT_CODE_ERROR_MEMORY; + goto rollback; + } + XMEMCPY(sub->filter, p, flen); + sub->filter[flen] = '\0'; + p += flen; + + cid = (char*)WOLFMQTT_MALLOC((size_t)key_len + 1); + if (cid == NULL) { + WOLFMQTT_FREE(sub->filter); + WOLFMQTT_FREE(sub); + rc = MQTT_CODE_ERROR_MEMORY; + goto rollback; + } + XMEMCPY(cid, key, key_len); + cid[key_len] = '\0'; + sub->client_id = cid; + sub->client = NULL; /* orphan until reconnect */ + sub->qos = (MqttQoS)qos; + sub->next = NULL; + + /* Append to local list (preserve decode order so the + * eventual broker->subs walk sees the same order as a + * shadow-write would have produced). */ + if (local_tail == NULL) { + local_head = sub; + } + else { + local_tail->next = sub; + } + local_tail = sub; + } +#endif + } + + /* All entries decoded - splice into broker->subs. For dynamic mode, + * prepend the local list head-first to match the existing + * shadow-write order; tail->next is set to the prior head. */ +#ifndef WOLFMQTT_STATIC_MEMORY + if (local_head != NULL) { + local_tail->next = broker->subs; + broker->subs = local_head; + } +#else + (void)j; +#endif + return 0; + +rollback: +#ifdef WOLFMQTT_STATIC_MEMORY + for (j = 0; j < claimed_count; j++) { + XMEMSET(&broker->subs[claimed[j]], 0, sizeof(BrokerSub)); + } +#else + while (local_head != NULL) { + BrokerSub* nxt = local_head->next; + if (local_head->filter != NULL) { + WOLFMQTT_FREE(local_head->filter); + } + if (local_head->client_id != NULL) { + WOLFMQTT_FREE(local_head->client_id); + } + WOLFMQTT_FREE(local_head); + local_head = nxt; + } +#endif + return rc; +} + +static int wmqb_iter_subs_cb(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx) +{ + struct wmqb_restore_ctx* c = (struct wmqb_restore_ctx*)cb_ctx; + int rc; + rc = wmqb_decode_and_insert_subs(c->broker, key, key_len, blob, + blob_len); + if (rc == 0) { + c->loaded++; + } + else { + c->skipped++; + } + return 0; +} + +#ifndef WOLFMQTT_STATIC_MEMORY +/* Decode NS_SESSION record and create a matching orphan slot. */ +static int wmqb_decode_and_insert_session(MqttBroker* broker, + const byte* blob, word32 blob_len) +{ + word32 body_len = 0; + int rc; + byte proto_level; + word32 session_expiry; + word16 cid_len; + const byte* p; + rc = wmqb_read_header(blob, blob_len, BROKER_PERSIST_NS_SESSION, + &body_len); + if (rc != 0) { + return rc; + } + if (body_len < 1 + 1 + 4 + 2) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + p = &blob[WMQB_HDR_LEN]; + proto_level = p[0]; + /* p[1] reserved */ + session_expiry = wmqb_r_u32(&p[2]); + cid_len = wmqb_r_u16(&p[6]); + if (body_len < (word32)(8 + cid_len)) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (wmqb_restore_create_orphan(broker, &p[8], cid_len, proto_level, + session_expiry) == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + return 0; +} + +static int wmqb_iter_session_cb(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx) +{ + struct wmqb_restore_ctx* c = (struct wmqb_restore_ctx*)cb_ctx; + int rc; + (void)key; (void)key_len; + rc = wmqb_decode_and_insert_session(c->broker, blob, blob_len); + if (rc == 0) { + c->loaded++; + } + else { + c->skipped++; + } + return 0; +} + +/* Decode NS_OUTQ record and append to the matching orphan's queue. + * Insertion is sorted by enq_time so replay preserves publish order. */ +static int wmqb_decode_and_insert_outq(MqttBroker* broker, + const byte* key, word16 key_len, const byte* blob, word32 blob_len) +{ + word32 body_len = 0; + int rc; + const byte* p; + const byte* end; + BrokerOrphanSession* o; + BrokerOutPub* e; + word16 cid_len; + word16 topic_len; + word32 payload_len; + byte state; + byte qos; + byte retain; + byte protocol_level; + word16 packet_id; + word64 enq_time; + word32 expiry_sec; + BrokerOutPub** prev_link; + BrokerOutPub* iter; + + if (key == NULL || key_len < 3) { + return MQTT_CODE_ERROR_BAD_ARG; + } + cid_len = key_len - 3; /* key = cid || 0x00 || pid_be(2) */ + if (key[cid_len] != 0x00) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + rc = wmqb_read_header(blob, blob_len, BROKER_PERSIST_NS_OUTQ, + &body_len); + if (rc != 0) { + return rc; + } + if (body_len < 22) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + p = &blob[WMQB_HDR_LEN]; + end = p + body_len; + state = *p++; + qos = *p++; + retain = *p++; + protocol_level = *p++; + p += 2; /* _reserved */ + packet_id = wmqb_r_u16(p); p += 2; + enq_time = wmqb_r_u64(p); p += 8; + expiry_sec = wmqb_r_u32(p); p += 4; + topic_len = wmqb_r_u16(p); p += 2; + if ((word32)(end - p) < (word32)topic_len + 4) { + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + + o = wmqb_restore_find_orphan(broker, key, cid_len); + if (o == NULL) { + /* OUTQ record without a matching session - orphan record + * leakage. Skip but keep the on-disk record intact; a wipe + * pass would clean these up. */ + return MQTT_CODE_ERROR_NOT_FOUND; + } + + e = (BrokerOutPub*)WOLFMQTT_MALLOC(sizeof(*e)); + if (e == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + XMEMSET(e, 0, sizeof(*e)); + e->topic = (char*)WOLFMQTT_MALLOC((size_t)topic_len + 1); + if (e->topic == NULL) { + WOLFMQTT_FREE(e); + return MQTT_CODE_ERROR_MEMORY; + } + XMEMCPY(e->topic, p, topic_len); + e->topic[topic_len] = '\0'; + p += topic_len; + + payload_len = wmqb_r_u32(p); p += 4; + if ((word32)(end - p) < payload_len) { + WOLFMQTT_FREE(e->topic); + WOLFMQTT_FREE(e); + return MQTT_CODE_ERROR_MALFORMED_DATA; + } + if (payload_len > 0) { + e->payload = (byte*)WOLFMQTT_MALLOC(payload_len); + if (e->payload == NULL) { + WOLFMQTT_FREE(e->topic); + WOLFMQTT_FREE(e); + return MQTT_CODE_ERROR_MEMORY; + } + XMEMCPY(e->payload, p, payload_len); + } + e->payload_len = payload_len; + e->qos = (MqttQoS)qos; + e->packet_id = packet_id; + e->retain = retain; + e->state = state; + e->enq_time = (WOLFMQTT_BROKER_TIME_T)enq_time; + e->expiry_sec = expiry_sec; + e->protocol_level = protocol_level; + + /* Insertion-sort by (enq_time, packet_id) so replay preserves + * publish order. Two messages enqueued within the same second + * (broker time granularity is 1s) tie-break on packet_id, which + * the broker hands out monotonically for the lifetime of a + * process. Across a restart packet_id resets but ordering is + * still preserved within each window since the saved enq_time + * advances between windows. */ + prev_link = &o->out_q_head; + iter = o->out_q_head; + while (iter != NULL) { + if (iter->enq_time < e->enq_time) { + prev_link = &iter->next; + iter = iter->next; + continue; + } + if (iter->enq_time == e->enq_time && + iter->packet_id < e->packet_id) { + prev_link = &iter->next; + iter = iter->next; + continue; + } + break; + } + e->next = iter; + *prev_link = e; + if (iter == NULL) { + o->out_q_tail = e; + } + o->out_q_count++; + if (state == BROKER_OUTQ_PUBLISH_SENT || + state == BROKER_OUTQ_PUBREL_SENT) { + o->out_q_inflight++; + } + return 0; +} + +static int wmqb_iter_outq_cb(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx) +{ + struct wmqb_restore_ctx* c = (struct wmqb_restore_ctx*)cb_ctx; + int rc; + rc = wmqb_decode_and_insert_outq(c->broker, key, key_len, blob, + blob_len); + if (rc == 0) { + c->loaded++; + } + else { + c->skipped++; + } + return 0; +} + +/* Sweep orphans whose v5 Session Expiry has elapsed. Cascades to subs + * and OUTQ records via the existing helpers. */ +static void wmqb_restore_expiry_sweep(MqttBroker* broker) +{ + BrokerOrphanSession* cur = broker->orphan_sessions; + WOLFMQTT_BROKER_TIME_T now = WOLFMQTT_BROKER_GET_TIME_S(); + while (cur != NULL) { + BrokerOrphanSession* next = cur->next; + /* Sign-safe elapsed-time check. The unsigned subtraction would + * wrap to a huge positive value if the wall clock has stepped + * backward since orphan_since was stamped (NTP step, RTC reset + * on embedded targets); guard with the > test so a backward + * jump never causes a spurious expiry. */ + if (cur->session_expiry_sec != 0xFFFFFFFFu && + cur->session_expiry_sec > 0 && + now >= cur->orphan_since && + (word64)(now - cur->orphan_since) >= + (word64)cur->session_expiry_sec) { + WMQB_LOG_INFO(broker, + "broker: persist expired session client_id=%s " + "(expiry=%us)", + cur->client_id == NULL ? "(null)" : cur->client_id, + (unsigned)cur->session_expiry_sec); + /* Shared teardown lives in mqtt_broker.c so the eviction + * path and this expiry-sweep path can't drift. Drops + * persisted records, the orphan's still-NULL-bound subs, + * and unlinks + frees the orphan slot. */ + BrokerOrphan_DropFull(broker, cur); + } + cur = next; + } +} +#endif /* !WOLFMQTT_STATIC_MEMORY */ + +/* -------------------------------------------------------------------------- */ +/* Schema-mismatch wipe */ +/* */ +/* Walks each namespace, collects every key into a heap-grown list, then */ +/* deletes them one by one. Two-pass to avoid mutating the backend during */ +/* iteration. Only attempts the wipe when the backend implements both */ +/* kv_iter and kv_del; otherwise logs a warning and returns - new records */ +/* will overwrite by key as they arrive. */ +/* -------------------------------------------------------------------------- */ +#ifndef WOLFMQTT_STATIC_MEMORY +/* wmqb_wipe_key struct defined above (shared with DelOutQueue). */ + +struct wmqb_wipe_ctx { + struct wmqb_wipe_key* head; + int collected; + int alloc_failed; +}; + +static int wmqb_wipe_iter_cb(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx) +{ + struct wmqb_wipe_ctx* wc = (struct wmqb_wipe_ctx*)cb_ctx; + struct wmqb_wipe_key* node; + (void)blob; (void)blob_len; + node = (struct wmqb_wipe_key*)WOLFMQTT_MALLOC(sizeof(*node)); + if (node == NULL) { + wc->alloc_failed = 1; + return 1; /* stop iteration; partial wipe is still useful */ + } + node->key = (byte*)WOLFMQTT_MALLOC(key_len); + if (node->key == NULL) { + WOLFMQTT_FREE(node); + wc->alloc_failed = 1; + return 1; + } + XMEMCPY(node->key, key, key_len); + node->key_len = key_len; + node->next = wc->head; + wc->head = node; + wc->collected++; + return 0; +} + +static int wmqb_wipe_ns(MqttBroker* broker, byte ns) +{ + const MqttBrokerPersistHooks* h = broker->persist; + struct wmqb_wipe_ctx wc; + struct wmqb_wipe_key* cur; + int deleted = 0; + + if (h->kv_iter == NULL || h->kv_del == NULL) { + return 0; + } + XMEMSET(&wc, 0, sizeof(wc)); + (void)h->kv_iter(h->ctx, ns, wmqb_wipe_iter_cb, &wc); + cur = wc.head; + while (cur != NULL) { + struct wmqb_wipe_key* next = cur->next; + if (h->kv_del(h->ctx, ns, cur->key, cur->key_len) == 0) { + deleted++; + } + WOLFMQTT_FREE(cur->key); + WOLFMQTT_FREE(cur); + cur = next; + } + if (h->sync != NULL) { + (void)h->sync(h->ctx); + } + return deleted; +} +#endif /* !WOLFMQTT_STATIC_MEMORY */ + +static int wmqb_wipe_all(MqttBroker* broker) +{ +#ifdef WOLFMQTT_STATIC_MEMORY + /* Static-memory builds typically pair with backends that lack a + * full kv_iter (e.g., raw flash). Skip the active wipe; new records + * overwrite by key as they arrive, and orphan files linger + * harmlessly. */ + WMQB_LOG_INFO(broker, + "broker: persist wipe skipped (STATIC_MEMORY mode)"); + return 0; +#else + int total = 0; + if (broker == NULL || broker->persist == NULL) { + return 0; + } + total += wmqb_wipe_ns(broker, BROKER_PERSIST_NS_META); + total += wmqb_wipe_ns(broker, BROKER_PERSIST_NS_SESSION); + total += wmqb_wipe_ns(broker, BROKER_PERSIST_NS_SUBS); + total += wmqb_wipe_ns(broker, BROKER_PERSIST_NS_RETAINED); + total += wmqb_wipe_ns(broker, BROKER_PERSIST_NS_OUTQ); + WMQB_LOG_INFO(broker, "broker: persist wipe deleted=%d", total); + return total; +#endif +} + +/* Restore is intended to be called exactly once per process (from + * MqttBroker_Start). Calling it more than once will re-insert + * already-restored subs / retained / OUTQ entries because the splice + * paths below do not check for duplicates against current in-memory + * state. */ +int BrokerPersist_Restore(MqttBroker* broker) +{ + const MqttBrokerPersistHooks* h; + int rc; + int meta_present = 0; + struct wmqb_restore_ctx ctx; + + if (broker == NULL || broker->persist == NULL) { + return 0; + } + h = broker->persist; + + rc = wmqb_meta_check(broker, &meta_present); + if (rc == MQTT_CODE_ERROR_MALFORMED_DATA || + rc == MQTT_CODE_ERROR_BAD_ARG) { + /* Schema or magic mismatch. Wipe-and-restart per the chosen + * policy: iterate every namespace, delete every record, + * restamp META. New records get written fresh as activity + * resumes. (BAD_ARG = header magic / schema version mismatch + * from wmqb_read_header; MALFORMED_DATA = decrypt or size + * failure from wmqb_kv_get_decrypt_meta.) */ + WMQB_LOG_ERR(broker, + "broker: persist schema mismatch - wiping all records"); + (void)wmqb_wipe_all(broker); + return wmqb_meta_write(broker); + } + if (rc != 0) { + /* Real backend error (I/O failure, permission denied, etc.). + * Refuse to start so the operator sees the failure rather than + * proceeding as if persistence were healthy. */ + WMQB_LOG_ERR(broker, + "broker: persist META read failed rc=%d - aborting restore", + rc); + return rc; + } + if (!meta_present) { + /* First run - no state to restore. Just stamp META. */ + return wmqb_meta_write(broker); + } + + XMEMSET(&ctx, 0, sizeof(ctx)); + ctx.broker = broker; +#ifndef WOLFMQTT_STATIC_MEMORY + /* Sessions first so subs and OUTQ entries can find their owner. */ + if (h->kv_iter != NULL) { + (void)wmqb_kv_iter(broker, BROKER_PERSIST_NS_SESSION, + wmqb_iter_session_cb, &ctx); + WMQB_LOG_INFO(broker, + "broker: persist restore sessions loaded=%d skipped=%d", + ctx.loaded, ctx.skipped); + ctx.loaded = 0; + ctx.skipped = 0; + } +#endif +#ifdef WOLFMQTT_BROKER_RETAINED + if (h->kv_iter != NULL) { + (void)wmqb_kv_iter(broker, BROKER_PERSIST_NS_RETAINED, + wmqb_iter_retained_cb, &ctx); + WMQB_LOG_INFO(broker, + "broker: persist restore retained loaded=%d skipped=%d", + ctx.loaded, ctx.skipped); + ctx.loaded = 0; + ctx.skipped = 0; + } +#endif + if (h->kv_iter != NULL) { + (void)wmqb_kv_iter(broker, BROKER_PERSIST_NS_SUBS, + wmqb_iter_subs_cb, &ctx); + WMQB_LOG_INFO(broker, + "broker: persist restore subs loaded=%d skipped=%d", + ctx.loaded, ctx.skipped); + ctx.loaded = 0; + ctx.skipped = 0; + } +#ifndef WOLFMQTT_STATIC_MEMORY + if (h->kv_iter != NULL) { + (void)wmqb_kv_iter(broker, BROKER_PERSIST_NS_OUTQ, + wmqb_iter_outq_cb, &ctx); + WMQB_LOG_INFO(broker, + "broker: persist restore outq loaded=%d skipped=%d", + ctx.loaded, ctx.skipped); + } + /* v5 Session Expiry sweep: drop any orphan whose session_expiry has + * elapsed since orphan_since was stamped. Cascades to its subs and + * persisted OUTQ records via the existing helpers. */ + wmqb_restore_expiry_sweep(broker); +#endif + return 0; +} + +#endif /* WOLFMQTT_BROKER_PERSIST */ diff --git a/src/mqtt_broker_persist_posix.c b/src/mqtt_broker_persist_posix.c new file mode 100644 index 00000000..73dd8a28 --- /dev/null +++ b/src/mqtt_broker_persist_posix.c @@ -0,0 +1,527 @@ +/* mqtt_broker_persist_posix.c + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfMQTT. + * + * wolfMQTT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfMQTT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* Default POSIX file-based persistence backend. + * + * Layout under : + * + * //.bin + * + * One file per record. Atomic update via write-tmp + fsync + rename + + * fsync directory. kv_iter walks the namespace directory, decodes hex + * filenames back to key bytes, and invokes the supplied callback with + * the full blob. + * + * Concurrency is not supported - a single broker process owns the + * tree. The directory is created on first init (with mode 0700 to keep + * persisted data accessible only to the broker user). */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include "wolfmqtt/mqtt_client.h" +#include "wolfmqtt/mqtt_broker.h" + +#ifdef WOLFMQTT_BROKER_PERSIST + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Context held by the backend. Lives inside the hooks->ctx pointer. */ +typedef struct WmqbPosixCtx { + char dir[512]; + /* Per-instance flag so Free knows we own this allocation. */ + int owned; +} WmqbPosixCtx; + +/* Forward decl of all hook callbacks. */ +static int wmqb_posix_put(void* ctx, byte ns, const byte* key, word16 key_len, + const byte* blob, word32 blob_len); +static int wmqb_posix_get(void* ctx, byte ns, const byte* key, word16 key_len, + byte* out, word32* inout_len); +static int wmqb_posix_del(void* ctx, byte ns, const byte* key, word16 key_len); +static int wmqb_posix_iter(void* ctx, byte ns, MqttBrokerPersist_IterCb cb, + void* cb_ctx); +static int wmqb_posix_sync(void* ctx); + +/* hex encode key bytes into out (must be 2*key_len+1). Lowercase. */ +static void wmqb_hex_encode(char* out, const byte* in, word16 in_len) +{ + static const char hex[] = "0123456789abcdef"; + word16 i; + for (i = 0; i < in_len; i++) { + out[2 * i] = hex[(in[i] >> 4) & 0xF]; + out[2 * i + 1] = hex[in[i] & 0xF]; + } + out[2 * in_len] = '\0'; +} + +/* hex decode a NUL-terminated hex string into out. Returns the byte + * length on success, -1 on malformed input. */ +static int wmqb_hex_decode(const char* in, byte* out, word16 out_cap) +{ + word16 n; + word16 i; + size_t raw_len; + if (in == NULL) { + return -1; + } + /* Reject pathological inputs whose length would silently truncate + * on the word16 cast below and slip through as a shorter-but-valid + * decode. */ + raw_len = XSTRLEN(in); + if (raw_len > 0xFFFFu) { + return -1; + } + n = (word16)raw_len; + if ((n & 1) != 0 || (n / 2) > out_cap) { + return -1; + } + for (i = 0; i < n / 2; i++) { + byte hi, lo; + char c = in[2 * i]; + if (c >= '0' && c <= '9') hi = (byte)(c - '0'); + else if (c >= 'a' && c <= 'f') hi = (byte)(10 + c - 'a'); + else if (c >= 'A' && c <= 'F') hi = (byte)(10 + c - 'A'); + else return -1; + c = in[2 * i + 1]; + if (c >= '0' && c <= '9') lo = (byte)(c - '0'); + else if (c >= 'a' && c <= 'f') lo = (byte)(10 + c - 'a'); + else if (c >= 'A' && c <= 'F') lo = (byte)(10 + c - 'A'); + else return -1; + out[i] = (byte)((hi << 4) | lo); + } + return n / 2; +} + +/* Build "/" path. Returns 0 on success, negative on overflow. */ +static int wmqb_ns_dir(const WmqbPosixCtx* c, byte ns, char* out, + size_t out_cap) +{ + int n = snprintf(out, out_cap, "%s/%u", c->dir, (unsigned)ns); + if (n <= 0 || (size_t)n >= out_cap) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + return 0; +} + +/* Build "//.bin" path. */ +static int wmqb_rec_path(const WmqbPosixCtx* c, byte ns, const byte* key, + word16 key_len, char* out, size_t out_cap) +{ + char hex[2 * 256 + 1]; + int n; + if (key_len > 256) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + wmqb_hex_encode(hex, key, key_len); + n = snprintf(out, out_cap, "%s/%u/%s.bin", c->dir, (unsigned)ns, hex); + if (n <= 0 || (size_t)n >= out_cap) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + return 0; +} + +/* mkdir -p semantics for a single trailing component. Tolerates EEXIST. */ +static int wmqb_mkdir(const char* path) +{ + if (mkdir(path, 0700) == 0) { + return 0; + } + if (errno == EEXIST) { + return 0; + } + return MQTT_CODE_ERROR_SYSTEM; +} + +/* Ensure / exists. Idempotent. */ +static int wmqb_ensure_ns_dir(const WmqbPosixCtx* c, byte ns) +{ + char path[576]; + int rc; + rc = wmqb_mkdir(c->dir); + if (rc != 0) { + return rc; + } + rc = wmqb_ns_dir(c, ns, path, sizeof(path)); + if (rc != 0) { + return rc; + } + return wmqb_mkdir(path); +} + +/* fsync a directory by open() + fsync() + close(). Best-effort. */ +static void wmqb_fsync_dir(const char* path) +{ + int fd = open(path, O_RDONLY); + if (fd >= 0) { + (void)fsync(fd); + (void)close(fd); + } +} + +/* kv_put: write to //.bin.tmp, fsync, rename, fsync dir. */ +static int wmqb_posix_put(void* ctx, byte ns, const byte* key, + word16 key_len, const byte* blob, word32 blob_len) +{ + WmqbPosixCtx* c = (WmqbPosixCtx*)ctx; + char final_path[640]; + char tmp_path[660]; + char ns_path[576]; + int fd; + int rc; + ssize_t w; + word32 written = 0; + + if (c == NULL || key == NULL || blob == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + rc = wmqb_ensure_ns_dir(c, ns); + if (rc != 0) { + return rc; + } + rc = wmqb_rec_path(c, ns, key, key_len, final_path, sizeof(final_path)); + if (rc != 0) { + return rc; + } + { + /* snprintf returns negative on encoding error, or a non-negative + * value that may be >= size on truncation. Treat both as + * failure - a truncated tmp_path would rename(2) to a + * different file than we intended. Pattern matches + * wmqb_rec_path. */ + int n = snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", final_path); + if (n < 0 || (size_t)n >= sizeof(tmp_path)) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + } + + fd = open(tmp_path, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + return MQTT_CODE_ERROR_SYSTEM; + } + { + /* Bounded EINTR retry. A signal storm should not cause an + * unbounded spin; bail with SYSTEM after 16 EINTRs so the + * caller can decide what to do. */ + int eintr_count = 0; + while (written < blob_len) { + w = write(fd, blob + written, blob_len - written); + if (w < 0) { + if (errno == EINTR && eintr_count++ < 16) { + continue; + } + (void)close(fd); + (void)unlink(tmp_path); + return MQTT_CODE_ERROR_SYSTEM; + } + written += (word32)w; + } + } + if (fsync(fd) < 0) { + (void)close(fd); + (void)unlink(tmp_path); + return MQTT_CODE_ERROR_SYSTEM; + } + (void)close(fd); + if (rename(tmp_path, final_path) < 0) { + (void)unlink(tmp_path); + return MQTT_CODE_ERROR_SYSTEM; + } + /* fsync the namespace dir so rename is durable. */ + if (wmqb_ns_dir(c, ns, ns_path, sizeof(ns_path)) == 0) { + wmqb_fsync_dir(ns_path); + } + return 0; +} + +static int wmqb_posix_get(void* ctx, byte ns, const byte* key, + word16 key_len, byte* out, word32* inout_len) +{ + WmqbPosixCtx* c = (WmqbPosixCtx*)ctx; + char path[640]; + int fd; + int rc; + ssize_t r; + word32 cap; + word32 read_total = 0; + + if (c == NULL || key == NULL || inout_len == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + cap = *inout_len; + rc = wmqb_rec_path(c, ns, key, key_len, path, sizeof(path)); + if (rc != 0) { + return rc; + } + fd = open(path, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) { + *inout_len = 0; + return MQTT_CODE_ERROR_NOT_FOUND; + } + return MQTT_CODE_ERROR_SYSTEM; + } + { + int eintr_count = 0; + while (read_total < cap) { + r = read(fd, out + read_total, cap - read_total); + if (r == 0) { + break; + } + if (r < 0) { + if (errno == EINTR && eintr_count++ < 16) { + continue; + } + (void)close(fd); + return MQTT_CODE_ERROR_SYSTEM; + } + read_total += (word32)r; + } + } + (void)close(fd); + *inout_len = read_total; + return 0; +} + +static int wmqb_posix_del(void* ctx, byte ns, const byte* key, + word16 key_len) +{ + WmqbPosixCtx* c = (WmqbPosixCtx*)ctx; + char path[640]; + char ns_path[576]; + int rc; + + if (c == NULL || key == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + rc = wmqb_rec_path(c, ns, key, key_len, path, sizeof(path)); + if (rc != 0) { + return rc; + } + if (unlink(path) < 0) { + if (errno == ENOENT) { + return 0; + } + return MQTT_CODE_ERROR_SYSTEM; + } + if (wmqb_ns_dir(c, ns, ns_path, sizeof(ns_path)) == 0) { + wmqb_fsync_dir(ns_path); + } + return 0; +} + +static int wmqb_posix_iter(void* ctx, byte ns, MqttBrokerPersist_IterCb cb, + void* cb_ctx) +{ + WmqbPosixCtx* c = (WmqbPosixCtx*)ctx; + char ns_path[576]; + DIR* d; + struct dirent* ent; + int rc; + + if (c == NULL || cb == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + rc = wmqb_ns_dir(c, ns, ns_path, sizeof(ns_path)); + if (rc != 0) { + return rc; + } + d = opendir(ns_path); + if (d == NULL) { + if (errno == ENOENT) { + return 0; + } + return MQTT_CODE_ERROR_SYSTEM; + } + while ((ent = readdir(d)) != NULL) { + char rec_path[640]; + char key_hex[2 * 256 + 1]; + byte key_buf[256]; + byte* blob; + word32 blob_cap; + struct stat st; + int fd; + ssize_t r; + word32 read_total; + int kn; + size_t nlen; + const char* dot; + int stop; + + if (ent->d_name[0] == '.') { + continue; + } + nlen = strlen(ent->d_name); + if (nlen < 5) { + continue; + } + dot = ent->d_name + nlen - 4; + if (strcmp(dot, ".bin") != 0) { + continue; + } + if ((nlen - 4) >= sizeof(key_hex)) { + continue; + } + XMEMCPY(key_hex, ent->d_name, nlen - 4); + key_hex[nlen - 4] = '\0'; + kn = wmqb_hex_decode(key_hex, key_buf, sizeof(key_buf)); + if (kn < 0) { + continue; + } + { + int n = snprintf(rec_path, sizeof(rec_path), "%s/%s", ns_path, + ent->d_name); + if (n < 0 || (size_t)n >= sizeof(rec_path)) { + continue; + } + } + if (stat(rec_path, &st) < 0) { + continue; + } + if (st.st_size <= 0 || (word64)st.st_size > 16 * 1024 * 1024) { + /* Sanity cap: refuse to load records larger than 16 MiB. */ + continue; + } + blob_cap = (word32)st.st_size; + blob = (byte*)WOLFMQTT_MALLOC(blob_cap); + if (blob == NULL) { + (void)closedir(d); + return MQTT_CODE_ERROR_MEMORY; + } + fd = open(rec_path, O_RDONLY); + if (fd < 0) { + WOLFMQTT_FREE(blob); + continue; + } + read_total = 0; + { + int eintr_count = 0; + while (read_total < blob_cap) { + r = read(fd, blob + read_total, blob_cap - read_total); + if (r == 0) { + break; + } + if (r < 0) { + if (errno == EINTR && eintr_count++ < 16) { + continue; + } + break; + } + read_total += (word32)r; + } + } + (void)close(fd); + if (read_total != blob_cap) { + WOLFMQTT_FREE(blob); + continue; + } + stop = cb(key_buf, (word16)kn, blob, blob_cap, cb_ctx); + WOLFMQTT_FREE(blob); + if (stop != 0) { + break; + } + } + (void)closedir(d); + return 0; +} + +static int wmqb_posix_sync(void* ctx) +{ + WmqbPosixCtx* c = (WmqbPosixCtx*)ctx; + /* The per-op fsync in put/del already covered the data + the + * namespace dir. A top-level fsync of the root dir here ensures + * any namespace-dir creates are durable too. */ + if (c == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + wmqb_fsync_dir(c->dir); + return 0; +} + +int MqttBrokerNet_PersistPosix_Init(MqttBrokerPersistHooks* hooks, + const char* dir) +{ + WmqbPosixCtx* c; + const char* use_dir = (dir != NULL) ? dir : BROKER_PERSIST_DIR_DEFAULT; + size_t dlen; + + if (hooks == NULL) { + return MQTT_CODE_ERROR_BAD_ARG; + } + dlen = strlen(use_dir); + if (dlen == 0 || dlen >= sizeof(((WmqbPosixCtx*)0)->dir)) { + return MQTT_CODE_ERROR_OUT_OF_BUFFER; + } + c = (WmqbPosixCtx*)WOLFMQTT_MALLOC(sizeof(*c)); + if (c == NULL) { + return MQTT_CODE_ERROR_MEMORY; + } + XMEMSET(c, 0, sizeof(*c)); + XMEMCPY(c->dir, use_dir, dlen); + c->dir[dlen] = '\0'; + c->owned = 1; + + XMEMSET(hooks, 0, sizeof(*hooks)); + hooks->kv_put = wmqb_posix_put; + hooks->kv_get = wmqb_posix_get; + hooks->kv_del = wmqb_posix_del; + hooks->kv_iter = wmqb_posix_iter; + hooks->sync = wmqb_posix_sync; + hooks->ctx = c; + + /* Create root dir up front so first put doesn't race. Tolerates + * EEXIST inside wmqb_mkdir. Non-fatal at init time (the first put + * will retry and surface any persistent error), but log a warning + * so an operator hitting EACCES on the default /var/lib/wolfmqtt + * path (broker run as a non-privileged user without -D) sees the + * failure here instead of being puzzled when nothing persists. */ + if (wmqb_mkdir(c->dir) != 0) { + fprintf(stderr, + "wolfmqtt: persist root mkdir failed dir=\"%s\" errno=%d (%s) " + "- persistence will fail unless the path is writable\n", + c->dir, errno, strerror(errno)); + } + return 0; +} + +void MqttBrokerNet_PersistPosix_Free(MqttBrokerPersistHooks* hooks) +{ + WmqbPosixCtx* c; + if (hooks == NULL) { + return; + } + c = (WmqbPosixCtx*)hooks->ctx; + if (c != NULL && c->owned) { + WOLFMQTT_FREE(c); + } + XMEMSET(hooks, 0, sizeof(*hooks)); +} + +#endif /* WOLFMQTT_BROKER_PERSIST */ diff --git a/wolfmqtt/mqtt_broker.h b/wolfmqtt/mqtt_broker.h index 4cc5dc2e..1ca0dee9 100644 --- a/wolfmqtt/mqtt_broker.h +++ b/wolfmqtt/mqtt_broker.h @@ -128,6 +128,88 @@ #define BROKER_MAX_INBOUND_QOS2 16 #endif +/* Per-subscriber outbound delivery shaping. + * + * BROKER_MAX_INFLIGHT_PER_SUB bounds the number of outbound QoS 1/2 PUBLISHes + * the broker may have in flight to a single subscriber at once. This gives + * users a Mosquitto "max_inflight_messages" equivalent that works on both + * v3.1.1 and v5; for v5 clients it is further clamped by the client's + * Receive Maximum property (MQTT v5 sec 3.1.2.11.3). + * + * The default is derived from BROKER_TX_BUF_SZ so that the per-subscriber + * outbound queue is "roughly the size that the tx buffer could plausibly + * pipeline" without picking an arbitrary number. Override with + * -DBROKER_MAX_INFLIGHT_PER_SUB=N to set a hard cap (1 = strict serial), + * -DBROKER_DEFAULT_AVG_MSG_SZ=N to retune the derivation. + * + * Only used in dynamic-memory mode; STATIC_MEMORY mode keeps the legacy + * synchronous fan-out path. */ +#ifndef BROKER_DEFAULT_AVG_MSG_SZ + #define BROKER_DEFAULT_AVG_MSG_SZ 256 +#endif +#ifndef BROKER_MIN_INFLIGHT_PER_SUB + #define BROKER_MIN_INFLIGHT_PER_SUB 8 +#endif +#ifndef BROKER_MAX_INFLIGHT_PER_SUB + #define BROKER_MAX_INFLIGHT_PER_SUB \ + (((BROKER_TX_BUF_SZ / BROKER_DEFAULT_AVG_MSG_SZ) < \ + BROKER_MIN_INFLIGHT_PER_SUB) ? \ + BROKER_MIN_INFLIGHT_PER_SUB : \ + (BROKER_TX_BUF_SZ / BROKER_DEFAULT_AVG_MSG_SZ)) +#endif + +/* Persistent storage caps (only meaningful with WOLFMQTT_BROKER_PERSIST). + * + * BROKER_MAX_PERSIST_SESSIONS bounds the number of disconnected + * persistent sessions kept across broker restart. + * BROKER_MAX_OFFLINE_MSGS_PER_SUB bounds the per-session offline queue + * depth; overflow drops the oldest message (FIFO eviction). */ +#ifndef BROKER_MAX_PERSIST_SESSIONS + #define BROKER_MAX_PERSIST_SESSIONS 64 +#endif +#ifndef BROKER_MAX_OFFLINE_MSGS_PER_SUB + #define BROKER_MAX_OFFLINE_MSGS_PER_SUB 32 +#endif + +/* Schema version stamped on every persisted record. Bump when the + * encoding of any namespace changes incompatibly; a startup with stored + * records carrying a different version logs a warning, wipes all + * persisted state, and starts clean (per plan: wipe-and-restart). */ +#ifndef WOLFMQTT_BROKER_PERSIST_SCHEMA_VER + /* Bumped from 1 -> 2 when the header layout split a dedicated + * wrap_mode byte out of the schema-version field. Any existing + * dev directory written by an older build mismatches and is + * wiped via the schema-mismatch path on first restart. */ + #define WOLFMQTT_BROKER_PERSIST_SCHEMA_VER 2 +#endif + +/* Header wrap_mode byte values (record body framing on disk). */ +#define WOLFMQTT_BROKER_PERSIST_WRAP_PLAIN 0 +#define WOLFMQTT_BROKER_PERSIST_WRAP_AES_GCM 1 + +/* Magic bytes prefixing every persisted record so a stray file in the + * backend directory cannot be misinterpreted as broker state. */ +#define WOLFMQTT_BROKER_PERSIST_MAGIC0 'W' +#define WOLFMQTT_BROKER_PERSIST_MAGIC1 'M' +#define WOLFMQTT_BROKER_PERSIST_MAGIC2 'Q' +#define WOLFMQTT_BROKER_PERSIST_MAGIC3 'B' + +/* Default storage directory for the POSIX backend. Application can pass + * a different path at MqttBrokerNet_PersistPosix_Init time. */ +#ifndef BROKER_PERSIST_DIR_DEFAULT + #define BROKER_PERSIST_DIR_DEFAULT "/var/lib/wolfmqtt" +#endif + +/* Persistence namespaces. One per logical record type. The backend + * is free to map each namespace to a separate directory, table, + * keyspace, or sub-region; the broker just passes the namespace byte + * verbatim. Values are stable across schema versions. */ +#define BROKER_PERSIST_NS_META 1 /* schema version, broker meta */ +#define BROKER_PERSIST_NS_SESSION 2 /* per-client_id session record */ +#define BROKER_PERSIST_NS_SUBS 3 /* per-client_id subscription list */ +#define BROKER_PERSIST_NS_RETAINED 4 /* per-topic retained message */ +#define BROKER_PERSIST_NS_OUTQ 5 /* per-client_id outbound queue + inflight */ + /* -------------------------------------------------------------------------- */ /* Feature toggles (opt-out: define WOLFMQTT_BROKER_NO_xxx to disable) */ /* -------------------------------------------------------------------------- */ @@ -177,6 +259,69 @@ typedef struct MqttBrokerNet { void* ctx; } MqttBrokerNet; +/* -------------------------------------------------------------------------- */ +/* Persistent storage hooks */ +/* -------------------------------------------------------------------------- */ +#ifdef WOLFMQTT_BROKER_PERSIST +/* The persistence layer is intentionally hook-based so the broker can run + * on top of POSIX files, embedded flash, an external KV store, or an + * in-RAM stub used by tests. Each hook returns 0 on success or a negative + * error code (broker logs and skips persist for that record - the + * in-memory state is still authoritative). + * + * Both a key/value API and a streaming API are provided. The broker will + * use whichever family the registered hook implements; any individual + * hook pointer may be NULL when not supported. At minimum kv_put / kv_get + * / kv_iter must be installed for sessions / subs / retained / outq to + * round-trip; the streaming API is offered for backends that prefer an + * append-only log (e.g., raw NOR flash). */ + +/* Iterator callback supplied by the broker to kv_iter. Return 0 to + * continue, non-zero to stop iteration early. */ +typedef int (*MqttBrokerPersist_IterCb)(const byte* key, word16 key_len, + const byte* blob, word32 blob_len, void* cb_ctx); + +/* Stream open mode. */ +#define BROKER_PERSIST_STREAM_READ 1 +#define BROKER_PERSIST_STREAM_WRITE 2 +#define BROKER_PERSIST_STREAM_APPEND 3 + +typedef struct MqttBrokerPersistHooks { + /* Key/value blob API. key bytes are opaque to the backend; len is + * always <= 256 in current use (a client_id or topic). */ + int (*kv_put)(void* ctx, byte ns, const byte* key, word16 key_len, + const byte* blob, word32 blob_len); + int (*kv_get)(void* ctx, byte ns, const byte* key, word16 key_len, + byte* out, word32* inout_len); + int (*kv_del)(void* ctx, byte ns, const byte* key, word16 key_len); + int (*kv_iter)(void* ctx, byte ns, MqttBrokerPersist_IterCb cb, + void* cb_ctx); + + /* Streaming API. handle is opaque; broker passes through. */ + int (*stream_open)(void* ctx, byte ns, const byte* key, word16 key_len, + int mode, void** handle); + int (*stream_read)(void* ctx, void* handle, byte* buf, word32 len, + word32* out_len); + int (*stream_write)(void* ctx, void* handle, const byte* buf, + word32 len); + int (*stream_close)(void* ctx, void* handle); + + /* Force all pending writes to durable storage. Called after every + * shadow-write commit per plan's "fsync after each commit" choice. */ + int (*sync)(void* ctx); + + /* Encryption-at-rest key derivation. Called once at broker init when + * WOLFMQTT_BROKER_PERSIST_ENCRYPT is enabled. Must fill 32 bytes + * (AES-256) into out_key. */ +#ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + int (*derive_key)(void* ctx, byte* out_key, word32 key_len); +#endif + + /* Backend context pointer passed back into every callback. */ + void* ctx; +} MqttBrokerPersistHooks; +#endif /* WOLFMQTT_BROKER_PERSIST */ + /* -------------------------------------------------------------------------- */ /* WebSocket per-client context */ /* -------------------------------------------------------------------------- */ @@ -218,6 +363,70 @@ typedef struct BrokerInboundQos2 { #endif #endif /* WOLFMQTT_MAX_QOS >= 2 */ +/* -------------------------------------------------------------------------- */ +/* Per-subscriber outbound publish queue (dynamic memory mode only). + * + * Each entry owns the topic and payload bytes via heap copy so the queue is + * independent of the publisher's rx_buf lifetime. The state field tracks + * the QoS handshake position for that one delivery to that one subscriber: + * + * BROKER_OUTQ_QUEUED Not yet sent on the wire. + * BROKER_OUTQ_PUBLISH_SENT QoS 1: awaiting PUBACK. QoS 2: awaiting PUBREC. + * BROKER_OUTQ_PUBREL_SENT QoS 2 only: PUBREC received, PUBREL sent, + * awaiting PUBCOMP. + * + * QoS 0 entries are deleted as soon as the PUBLISH is written; they never + * leave the QUEUED state and never increment the inflight counter. */ +#ifndef WOLFMQTT_STATIC_MEMORY +enum BrokerOutPubState { + BROKER_OUTQ_QUEUED = 0, + BROKER_OUTQ_PUBLISH_SENT = 1, + BROKER_OUTQ_PUBREL_SENT = 2 +}; + +typedef struct BrokerOutPub { + char* topic; /* heap-owned, NUL-terminated */ + byte* payload; /* heap-owned, may be NULL when payload_len == 0 */ + word32 payload_len; + MqttQoS qos; + word16 packet_id; /* 0 for QoS 0 */ + byte retain; + byte state; /* BROKER_OUTQ_* */ + /* On session resumption, BrokerOrphan_Reclaim resets any entry + * that was previously PUBLISH_SENT back to QUEUED and sets + * retransmit_dup=1. The drain encodes the PUBLISH with + * MqttPublish.duplicate=1 on first re-send, as required by + * MQTT-4.4.0-1, then clears the flag. */ + byte retransmit_dup; /* 0 or 1 */ + WOLFMQTT_BROKER_TIME_T enq_time; + word32 expiry_sec; /* v5 Message Expiry Interval, 0 = no expiry */ + byte protocol_level; /* echoed back to subscriber on send */ + struct BrokerOutPub* next; +} BrokerOutPub; + +/* -------------------------------------------------------------------------- */ +/* Orphan session (dynamic memory only). */ +/* */ +/* Holds the persistent-session state of a disconnected client (Clean */ +/* Start=0): its outbound message queue, in-flight QoS 1/2 receipts, and */ +/* enough identity to be reclaimed on reconnect. Smaller than a full */ +/* BrokerClient because no socket / tx_buf / rx_buf / TLS state is needed */ +/* while disconnected. Subs that belonged to the original BrokerClient */ +/* keep sub->client=NULL while orphaned; reconnect rebinds them. */ +/* -------------------------------------------------------------------------- */ +typedef struct BrokerOrphanSession { + char* client_id; /* heap-owned, NUL-terminated */ + byte protocol_level; + word32 session_expiry_sec; /* v5 Session Expiry; 0xFFFFFFFF=never */ + WOLFMQTT_BROKER_TIME_T orphan_since; + BrokerOutPub* out_q_head; + BrokerOutPub* out_q_tail; + int out_q_count; + int out_q_inflight; + struct BrokerOrphanSession* next; +} BrokerOrphanSession; +#endif + /* -------------------------------------------------------------------------- */ /* Broker client tracking */ /* -------------------------------------------------------------------------- */ @@ -293,6 +502,29 @@ typedef struct BrokerClient { int qos2_pending_count; #endif #endif /* WOLFMQTT_MAX_QOS >= 2 */ +#ifndef WOLFMQTT_STATIC_MEMORY + /* Per-subscriber outbound publish queue. FIFO from head to tail; + * drain pulls from head. out_q_inflight is the number of entries in + * state PUBLISH_SENT or PUBREL_SENT (QoS 1/2 awaiting an ack); + * BROKER_MAX_INFLIGHT_PER_SUB and client_receive_max together bound + * how many of those may exist at once. out_q_count is total entries + * including not-yet-sent QUEUED ones. Used for fan-out at every + * QoS level (QoS 0 forwards transit the queue too). */ + BrokerOutPub* out_q_head; + BrokerOutPub* out_q_tail; + int out_q_count; + int out_q_inflight; + /* v5 Receive Maximum advertised by this client in CONNECT, or 65535 + * (per MQTT v5 sec 3.1.2.11.3) when the client did not include the + * property. For v3.1.1 clients this is left at 65535 - the cap + * comes from BROKER_MAX_INFLIGHT_PER_SUB alone. */ + word16 client_receive_max; + /* v5 Session Expiry Interval (seconds). Captured from CONNECT + * properties for clean_session=0 sessions so the disconnect path + * can stamp it into the orphan slot. 0xFFFFFFFF means "never + * expire"; the v3.1.1 persistent-session default. */ + word32 session_expiry_sec; +#endif /* !WOLFMQTT_STATIC_MEMORY */ } BrokerClient; /* -------------------------------------------------------------------------- */ @@ -410,6 +642,30 @@ typedef struct MqttBroker { const char *ws_tls_key; const char *ws_tls_ca; #endif +#ifdef WOLFMQTT_BROKER_PERSIST + /* Pointer (not embedded struct) so the broker stays small when no + * application installs hooks. NULL means "in-memory only", which is + * the same behavior as a build without WOLFMQTT_BROKER_PERSIST. */ + const MqttBrokerPersistHooks* persist; + #ifdef WOLFMQTT_BROKER_PERSIST_ENCRYPT + /* AES-256 key cache for at-rest encryption. Populated by the first + * encrypt/decrypt call via derive_key(); zeroed (ForceZero) on + * MqttBroker_Free. Per-broker so multiple broker instances in one + * process don't share key material. */ + byte persist_key_cache[32]; + byte persist_key_loaded; /* 0 or 1 */ + #endif +#endif +#ifndef WOLFMQTT_STATIC_MEMORY + /* Linked list of disconnected persistent sessions. Each entry holds + * its own outbound queue + identity so messages published while the + * owning client is offline are retained until reconnect (or + * BROKER_MAX_PERSIST_SESSIONS forces drop-oldest eviction). Subs + * pointing at orphaned sessions keep sub->client=NULL; fan-out + * branches on that to look up the orphan by client_id. */ + BrokerOrphanSession* orphan_sessions; + int orphan_session_count; +#endif } MqttBroker; /* -------------------------------------------------------------------------- */ @@ -448,6 +704,79 @@ WOLFMQTT_API int MqttBrokerNet_wolfIP_Init(MqttBrokerNet* net, WOLFMQTT_API int MqttBrokerNet_Init(MqttBrokerNet* net); #endif +#ifdef WOLFMQTT_BROKER_PERSIST +/* Install persistence hooks on the broker. Must be called before + * MqttBroker_Start. Passing NULL clears any previously installed hooks + * (reverts to in-memory-only behavior). The MqttBrokerPersistHooks + * struct must outlive the broker. */ +WOLFMQTT_API int MqttBroker_SetPersistHooks(MqttBroker* broker, + const MqttBrokerPersistHooks* hooks); + +/* Initialize the default POSIX file-based persistence backend. Stores + * each record as a file under dir (defaults to BROKER_PERSIST_DIR_DEFAULT + * when dir is NULL). fsync's every commit. Caller is responsible for + * keeping the hooks struct alive while the broker runs. */ +WOLFMQTT_API int MqttBrokerNet_PersistPosix_Init( + MqttBrokerPersistHooks* hooks, const char* dir); + +/* Tear down the POSIX backend - releases the directory descriptor and + * any per-handle state. Does not delete persisted files. */ +WOLFMQTT_API void MqttBrokerNet_PersistPosix_Free( + MqttBrokerPersistHooks* hooks); + +/* -------------------------------------------------------------------------- */ +/* Internal shadow-write helpers (linked from mqtt_broker.c into the + * mqtt_broker binary). All are no-ops when broker->persist is NULL so + * call sites do not need to guard. WOLFMQTT_LOCAL keeps them out of + * the public shared-library ABI. The encoders use a heap-allocated + * scratch buffer sized to the record - too large to live on the + * select-loop stack and bursty enough that a per-call alloc is the + * least surprising approach. Backends can themselves choose how to + * persist or fsync. Forward-compat is via WOLFMQTT_BROKER_PERSIST_SCHEMA_VER. */ +struct BrokerClient; +struct BrokerSub; +struct BrokerRetainedMsg; +struct BrokerOutPub; + +WOLFMQTT_LOCAL int BrokerPersist_PutSession(MqttBroker* broker, + const struct BrokerClient* bc); +WOLFMQTT_LOCAL int BrokerPersist_DelSession(MqttBroker* broker, + const char* client_id); + +WOLFMQTT_LOCAL int BrokerPersist_PutSubs(MqttBroker* broker, + const char* client_id); +WOLFMQTT_LOCAL int BrokerPersist_DelSubs(MqttBroker* broker, + const char* client_id); + +WOLFMQTT_LOCAL int BrokerPersist_PutRetained(MqttBroker* broker, + const struct BrokerRetainedMsg* rm); +WOLFMQTT_LOCAL int BrokerPersist_DelRetained(MqttBroker* broker, + const char* topic); + +WOLFMQTT_LOCAL int BrokerPersist_PutOutPub(MqttBroker* broker, + const char* client_id, const struct BrokerOutPub* e); +WOLFMQTT_LOCAL int BrokerPersist_DelOutPub(MqttBroker* broker, + const char* client_id, word16 packet_id); +WOLFMQTT_LOCAL int BrokerPersist_DelOutQueue(MqttBroker* broker, + const char* client_id); + +/* Startup-time restore: iterate persisted records and rebuild the + * in-memory tables. Called from MqttBroker_Init when hooks are + * installed. Wipes everything and re-stamps the META namespace if + * the persisted schema version doesn't match. */ +WOLFMQTT_LOCAL int BrokerPersist_Restore(MqttBroker* broker); +#endif /* WOLFMQTT_BROKER_PERSIST */ + +#ifndef WOLFMQTT_STATIC_MEMORY +/* Full orphan teardown: delete persisted records (no-op without + * WOLFMQTT_BROKER_PERSIST), drop any orphan-bound subs + * (sub->client == NULL with matching client_id) from broker->subs, + * unlink and free the orphan slot. Used by both eviction (cap reached) + * and restore-time expiry sweep so the two paths can't drift. */ +WOLFMQTT_LOCAL void BrokerOrphan_DropFull(MqttBroker* broker, + BrokerOrphanSession* o); +#endif + /* CLI wrapper interface */ WOLFMQTT_API int wolfmqtt_broker(int argc, char** argv);