diff --git a/examples/blob_raw.ql b/examples/blob_raw.ql
new file mode 100644
index 000000000..23390130a
--- /dev/null
+++ b/examples/blob_raw.ql
@@ -0,0 +1,4 @@
+[CODE]
+load_address = 0x10000000
+entry_point = 0x10000008
+ram_size = 0xa00000
\ No newline at end of file
diff --git a/examples/hello_arm_blob_raw.py b/examples/hello_arm_blob_raw.py
new file mode 100644
index 000000000..4c257166e
--- /dev/null
+++ b/examples/hello_arm_blob_raw.py
@@ -0,0 +1,101 @@
+##############################################################################
+# This example is meant to demonstrate the modifications necessary 
+# to enable code coverage when emulating small code snippets or bare-metal 
+# code.
+##############################################################################
+from qiling import Qiling
+from qiling.const import QL_ARCH, QL_OS, QL_VERBOSE
+from qiling.extensions.coverage import utils as cov_utils
+from qiling.loader.loader import Image
+import os
+
+BASE_ADDRESS = 0x10000000
+CHECKSUM_FUNC_ADDR = BASE_ADDRESS + 0x8
+END_ADDRESS = 0x100000ba
+DATA_ADDR = 0xa0000000 # Arbitrary address for data
+STACK_ADDR = 0xb0000000 # Arbitrary address for stack
+
+# Python implementation of the checksum function being emulated
+# This checksum function is intended to have different code paths based on the input
+# which is useful for observing code coverage
+def checksum_function(input_data_buffer: bytes):
+    expected_checksum_python = 0
+    input_data_len = len(input_data_buffer)
+    if input_data_len >= 1 and input_data_buffer[0] == 0xDE: # MAGIC_VALUE_1
+        for i in range(min(input_data_len, 4)):
+            expected_checksum_python += input_data_buffer[i]
+        expected_checksum_python += 0x10
+    elif input_data_len >= 2 and input_data_buffer[1] == 0xAD: # MAGIC_VALUE_2
+        for i in range(input_data_len):
+            expected_checksum_python ^= input_data_buffer[i]
+        expected_checksum_python += 0x20
+    else:
+        for i in range(input_data_len):
+            expected_checksum_python += input_data_buffer[i]
+    expected_checksum_python &= 0xFF # Ensure it's a single byte
+    return expected_checksum_python
+
+def unmapped_handler(ql: Qiling, type: int, addr: int, size: int, value: int) -> None:
+    print(f"Unmapped Memory R/W, trying to access {size:d} bytes at {addr:#010x} from {ql.arch.regs.pc:#010x}")
+
+def emulate_checksum_function(input_data_buffer: bytes) -> None:
+    print(f"\n--- Testing with input: {input_data_buffer.hex()} ---")
+
+    test_file = "rootfs/blob/example_raw.bin"
+
+    with open(test_file, "rb") as f:
+        raw_code: bytes = f.read()
+
+    ql: Qiling = Qiling(
+        code=raw_code,
+        archtype=QL_ARCH.ARM,
+        ostype=QL_OS.BLOB,
+        profile="blob_raw.ql",
+        verbose=QL_VERBOSE.DEBUG,
+        thumb=True
+    )
+
+    ''' monkeypatch - Correcting the loader image name, used for coverage collection
+    removing all images with name 'blob_code' that were created by the blob loader. 
+    This is necessary because some code coverage visualization tools require the 
+    module name to match that of the input file '''
+    ql.loader.images = [img for img in ql.loader.images if img.path != 'blob_code']
+    ql.loader.images.append(Image(ql.loader.load_address, ql.loader.load_address + ql.os.code_ram_size, os.path.basename(test_file)))
+
+    input_data_len: int = len(input_data_buffer)
+
+    # Map memory for the data and stack
+    ql.mem.map(STACK_ADDR, 0x2000)
+    ql.mem.map(DATA_ADDR, ql.mem.align_up(input_data_len + 0x100)) # Map enough space for data
+
+    # Write input data
+    ql.mem.write(DATA_ADDR, input_data_buffer)
+
+    # Set up the stack pointer
+    ql.arch.regs.sp = STACK_ADDR + 0x2000 - 4
+    # Set up argument registers
+    ql.arch.regs.r0 = DATA_ADDR
+    ql.arch.regs.r1 = input_data_len
+
+    # Set the program counter to the function's entry point
+    ql.arch.regs.pc = CHECKSUM_FUNC_ADDR
+
+    # Set the return address (LR) to a dummy address.
+    ql.arch.regs.lr = 0xbebebebe
+
+    ql.hook_mem_unmapped(unmapped_handler)
+    #ql.debugger="gdb:127.0.0.1:9999"
+
+    # Start emulation
+    print(f"Starting emulation at PC: {hex(ql.arch.regs.pc)}")
+    try:
+        with cov_utils.collect_coverage(ql, 'drcov', 'output.cov'):
+            ql.run(begin=CHECKSUM_FUNC_ADDR, end=END_ADDRESS)
+    except Exception as e:
+        print(f"Emulation error: {e}")
+
+    print(f"Emulated checksum: {hex(ql.arch.regs.r0)}")
+
+if __name__ == "__main__":
+    data = b"\x01\x02\x03\x04\x05"  # Example input data
+    emulate_checksum_function(data)
\ No newline at end of file
diff --git a/examples/rootfs b/examples/rootfs
index f71f45fe1..120fb6d37 160000
--- a/examples/rootfs
+++ b/examples/rootfs
@@ -1 +1 @@
-Subproject commit f71f45fe1a39d58d8b8cae717f55cebeb37f63c7
+Subproject commit 120fb6d37700a2d4c0e35ced599aaee7a8f98723
diff --git a/examples/src/blob/Makefile b/examples/src/blob/Makefile
new file mode 100644
index 000000000..74966f268
--- /dev/null
+++ b/examples/src/blob/Makefile
@@ -0,0 +1,52 @@
+# Makefile for Bare-Metal ARM Checksum Calculator
+
+# --- Toolchain Definitions ---
+TOOLCHAIN_PREFIX = arm-none-eabi
+
+# Compiler, Linker, and Objcopy executables
+CC = $(TOOLCHAIN_PREFIX)-gcc
+LD = $(TOOLCHAIN_PREFIX)-gcc
+OBJCOPY = $(TOOLCHAIN_PREFIX)-objcopy
+
+# --- Source and Output Files ---
+SRCS = example_raw.c
+OBJS = $(SRCS:.c=.o) # Convert .c to .o
+ELF = example_raw.elf
+BIN = example_raw.bin
+
+# --- Linker Script ---
+LDSCRIPT = linker.ld
+
+# --- Compiler Flags ---
+CFLAGS = -c -O0 -mcpu=cortex-a7 -mthumb -ffreestanding -nostdlib
+
+# --- Linker Flags ---
+LDFLAGS = -T $(LDSCRIPT) -nostdlib
+
+# --- Objcopy Flags ---
+OBJCOPYFLAGS = -O binary
+
+# --- Default Target ---
+.PHONY: all clean
+
+all: $(BIN)
+
+# Rule to build the raw binary (.bin) from the ELF file
+$(BIN): $(ELF)
+	$(OBJCOPY) $(OBJCOPYFLAGS) $< $@
+	@echo "Successfully created $(BIN)"
+
+# Rule to link the object file into an ELF executable
+$(ELF): $(OBJS) $(LDSCRIPT)
+	$(LD) $(LDFLAGS) $(OBJS) -o $@
+	@echo "Successfully linked $(ELF)"
+
+# Rule to compile the C source file into an object file
+%.o: %.c
+	$(CC) $(CFLAGS) $< -o $@
+	@echo "Successfully compiled $<"
+
+# --- Clean Rule ---
+clean:
+	rm -f $(OBJS) $(ELF) $(BIN)
+	@echo "Cleaned build artifacts."
diff --git a/examples/src/blob/example_raw.c b/examples/src/blob/example_raw.c
new file mode 100644
index 000000000..13cd70779
--- /dev/null
+++ b/examples/src/blob/example_raw.c
@@ -0,0 +1,56 @@
+// example checksum algorithm to demonstrate raw binary code coverage in qiling
+// example_raw.c
+
+// Define some magic values
+#define MAGIC_VALUE_1 0xDE
+#define MAGIC_VALUE_2 0xAD
+
+// This function calculates a checksum with branches based on input data
+// It takes a pointer to data and its length
+// Returns the checksum (unsigned char to fit in a byte)
+unsigned char calculate_checksum(const unsigned char *data, unsigned int length) {
+    unsigned char checksum = 0;
+
+    // Branch 1: Check for MAGIC_VALUE_1 at the start
+    if (length >= 1 && data[0] == MAGIC_VALUE_1) {
+        // If first byte is MAGIC_VALUE_1, do a simple sum of first 4 bytes
+        // (or up to length if less than 4)
+        for (unsigned int i = 0; i < length && i < 4; i++) {
+            checksum += data[i];
+        }
+        // Add a fixed offset to make this path distinct
+        checksum += 0x10;
+    }
+    // Branch 2: Check for MAGIC_VALUE_2 at the second byte
+    else if (length >= 2 && data[1] == MAGIC_VALUE_2) {
+        // If second byte is MAGIC_VALUE_2, do a XOR sum of all bytes
+        for (unsigned int i = 0; i < length; i++) {
+            checksum ^= data[i];
+        }
+        // Add a fixed offset to make this path distinct
+        checksum += 0x20;
+    }
+    // Default Branch: Standard byte sum checksum
+    else {
+        for (unsigned int i = 0; i < length; i++) {
+            checksum += data[i];
+        }
+    }
+
+    return checksum;
+}
+
+// Minimal entry point for bare-metal.
+// This function will not be called directly during Qiling emulation,
+// but it's needed for the linker to have an entry point.
+__attribute__((section(".text.startup")))
+void _start() {
+    // In a real bare-metal application, this would initialize hardware,
+    // set up stacks, etc. For this example, it's just a placeholder.
+    // We'll call calculate_checksum directly from our Qiling script.
+
+    while (1) {
+        // Do nothing, or perhaps put the CPU to sleep
+        asm volatile ("wfi"); // Wait For Interrupt (ARM instruction)
+    }
+}
\ No newline at end of file
diff --git a/examples/src/blob/linker.ld b/examples/src/blob/linker.ld
new file mode 100644
index 000000000..ae31f2fa3
--- /dev/null
+++ b/examples/src/blob/linker.ld
@@ -0,0 +1,39 @@
+/* linker.ld */
+
+ENTRY(_start) /* Define the entry point of our program */
+
+/* Define memory regions - simple RAM region for this example */
+MEMORY
+{
+    ram (rwx) : ORIGIN = 0x10000000, LENGTH = 64K /* 64KB of RAM for our program */
+}
+
+SECTIONS
+{
+    /* Define the start of our program in memory.
+     */
+    . = 0x10000000;
+
+    .text : {
+        KEEP(*(.text.startup)) /* Keep the _start function */
+        *(.text)             /* All other code */
+        *(.text.*)
+        *(.rodata)           /* Read-only data */
+        *(.rodata.*)
+        . = ALIGN(4);
+    } > ram /* Place .text section in the 'ram' region */
+
+    .data : {
+        . = ALIGN(4);
+        *(.data)             /* Initialized data */
+        *(.data.*)
+        . = ALIGN(4);
+    } > ram
+
+    .bss : {
+        . = ALIGN(4);
+        *(.bss)
+        *(.bss.*)
+        . = ALIGN(4);
+    } > ram
+}
\ No newline at end of file
diff --git a/examples/uboot_bin.ql b/examples/uboot_bin.ql
index c33a7d238..1e95311fe 100644
--- a/examples/uboot_bin.ql
+++ b/examples/uboot_bin.ql
@@ -2,6 +2,7 @@
 ram_size = 0xa00000
 load_address = 0x80800000
 entry_point = 0x80800000
+heap_address = 0xa0000000
 heap_size = 0x300000
 
 
diff --git a/qiling/loader/blob.py b/qiling/loader/blob.py
index f17b80a9d..728443391 100644
--- a/qiling/loader/blob.py
+++ b/qiling/loader/blob.py
@@ -5,7 +5,6 @@
 
 from qiling import Qiling
 from qiling.loader.loader import QlLoader, Image
-from qiling.os.memory import QlMemoryHeap
 
 
 class QlLoaderBLOB(QlLoader):
@@ -28,11 +27,5 @@ def run(self):
         # allow image-related functionalities
         self.images.append(Image(code_begins, code_ends, 'blob_code'))
 
-        # FIXME: heap starts above end of ram??
-        # FIXME: heap should be allocated by OS, not loader
-        heap_base = code_ends
-        heap_size = int(self.ql.os.profile.get("CODE", "heap_size"), 16)
-        self.ql.os.heap = QlMemoryHeap(self.ql, heap_base, heap_base + heap_size)
-
         # FIXME: stack pointer should be a configurable profile setting
         self.ql.arch.regs.arch_sp = code_ends - 0x1000
diff --git a/qiling/os/blob/blob.py b/qiling/os/blob/blob.py
index e4a022562..af52fa74a 100644
--- a/qiling/os/blob/blob.py
+++ b/qiling/os/blob/blob.py
@@ -8,6 +8,7 @@
 from qiling.const import QL_ARCH, QL_OS
 from qiling.os.fcall import QlFunctionCall
 from qiling.os.os import QlOs
+from qiling.os.memory import QlMemoryHeap
 
 
 class QlOsBlob(QlOs):
@@ -49,5 +50,11 @@ def run(self):
         # if exit point was set explicitly, override the default one
         if self.ql.exit_point is not None:
             self.exit_point = self.ql.exit_point
-
+        
+        # if heap info is provided in profile, create heap
+        heap_base = self.profile.getint('CODE', 'heap_address', fallback=None)
+        heap_size = self.profile.getint('CODE', 'heap_size', fallback=None)
+        if heap_base is not None and heap_size is not None:
+            self.heap = QlMemoryHeap(self.ql, heap_base, heap_base + heap_size)
+        
         self.ql.emu_start(self.entry_point, self.exit_point, self.ql.timeout, self.ql.count)
diff --git a/tests/profiles/blob_raw.ql b/tests/profiles/blob_raw.ql
new file mode 100644
index 000000000..23390130a
--- /dev/null
+++ b/tests/profiles/blob_raw.ql
@@ -0,0 +1,4 @@
+[CODE]
+load_address = 0x10000000
+entry_point = 0x10000008
+ram_size = 0xa00000
\ No newline at end of file
diff --git a/tests/profiles/uboot_bin.ql b/tests/profiles/uboot_bin.ql
index c33a7d238..1e95311fe 100644
--- a/tests/profiles/uboot_bin.ql
+++ b/tests/profiles/uboot_bin.ql
@@ -2,6 +2,7 @@
 ram_size = 0xa00000
 load_address = 0x80800000
 entry_point = 0x80800000
+heap_address = 0xa0000000
 heap_size = 0x300000
 
 
diff --git a/tests/test_blob.py b/tests/test_blob.py
index 33e35751a..0bd9a6629 100644
--- a/tests/test_blob.py
+++ b/tests/test_blob.py
@@ -82,6 +82,67 @@ def partial_run_init(ql: Qiling):
 
         del ql
 
+    def test_blob_raw(self):
+        def run_checksum_emu(input_data_buffer: bytes) -> int:
+            """
+            Callable function that takes input data buffer and returns the checksum.
+            """
+            BASE_ADDRESS = 0x10000000
+            CHECKSUM_FUNC_ADDR = BASE_ADDRESS + 0x8
+            END_ADDRESS = 0x100000ba
+            DATA_ADDR = 0xa0000000
+            STACK_ADDR = 0xb0000000
+
+            with open("../examples/rootfs/blob/example_raw.bin", "rb") as f:
+                raw_code = f.read()
+
+            ql = Qiling(code=raw_code, archtype=QL_ARCH.ARM, ostype=QL_OS.BLOB, profile="profiles/blob_raw.ql", verbose=QL_VERBOSE.DEBUG, thumb=True)
+
+            input_data_len = len(input_data_buffer)
+
+            # Map memory for data and stack
+            ql.mem.map(STACK_ADDR, 0x2000)
+            ql.mem.map(DATA_ADDR, ql.mem.align_up(input_data_len + 0x100))
+
+            # Write input data
+            ql.mem.write(DATA_ADDR, input_data_buffer)
+
+            # Set up registers
+            ql.arch.regs.sp = STACK_ADDR + 0x2000 - 4
+            ql.arch.regs.r0 = DATA_ADDR
+            ql.arch.regs.r1 = input_data_len
+            ql.arch.regs.pc = CHECKSUM_FUNC_ADDR
+            ql.arch.regs.lr = 0xbebebebe
+
+            ql.run(begin=CHECKSUM_FUNC_ADDR, end=END_ADDRESS)
+            result = ql.arch.regs.r0
+
+            return result
+
+        def calculate_expected_checksum(input_data_buffer: bytes) -> int:
+            """
+            Python implementation of the expected checksum calculation.
+            """
+            input_data_len = len(input_data_buffer)
+            expected_checksum = 0
+
+            if input_data_len >= 1 and input_data_buffer[0] == 0xDE:  # MAGIC_VALUE_1
+                for i in range(min(input_data_len, 4)):
+                    expected_checksum += input_data_buffer[i]
+                expected_checksum += 0x10
+            elif input_data_len >= 2 and input_data_buffer[1] == 0xAD:  # MAGIC_VALUE_2
+                for i in range(input_data_len):
+                    expected_checksum ^= input_data_buffer[i]
+                expected_checksum += 0x20
+            else:
+                for i in range(input_data_len):
+                    expected_checksum += input_data_buffer[i]
+
+            return expected_checksum & 0xFF
+
+        test_input = b"\x01\x02\x03\x04\x05"
+        self.assertEqual(run_checksum_emu(test_input), calculate_expected_checksum(test_input))
+
 
 if __name__ == "__main__":
     unittest.main()