diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0f6b57e0a..c5962c6c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## Unreleased
+
+**Improvements**
+
+- Check UTF-8 validity memory efficiently ([#890](https://github.com/getsentry/symbolic/pull/890))
+
 ## 12.13.2
 
 **Fixes**
diff --git a/Cargo.lock b/Cargo.lock
index 4d55e2799..1f1a8b8b9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -227,6 +227,21 @@ version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "597bb81c80a54b6a4381b23faba8d7774b144c94cbd1d6fe3f1329bd776554ab"
 
+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -295,6 +310,12 @@ version = "3.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "bytes"
 version = "1.9.0"
@@ -693,6 +714,12 @@ dependencies = [
  "miniz_oxide",
 ]
 
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.1"
@@ -1646,6 +1673,15 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
 
+[[package]]
+name = "ppv-lite86"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+dependencies = [
+ "zerocopy",
+]
+
 [[package]]
 name = "precomputed-hash"
 version = "0.1.1"
@@ -1683,6 +1719,26 @@ dependencies = [
  "watto",
 ]
 
+[[package]]
+name = "proptest"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50"
+dependencies = [
+ "bit-set",
+ "bit-vec",
+ "bitflags 2.7.0",
+ "lazy_static",
+ "num-traits",
+ "rand",
+ "rand_chacha",
+ "rand_xorshift",
+ "regex-syntax 0.8.5",
+ "rusty-fork",
+ "tempfile",
+ "unarray",
+]
+
 [[package]]
 name = "psm"
 version = "0.1.24"
@@ -1692,6 +1748,12 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
 [[package]]
 name = "quote"
 version = "1.0.38"
@@ -1713,6 +1775,18 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
  "rand_core",
 ]
 
@@ -1721,6 +1795,18 @@ name = "rand_core"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_xorshift"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
+dependencies = [
+ "rand_core",
+]
 
 [[package]]
 name = "range-collections"
@@ -1849,6 +1935,18 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "rusty-fork"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
+dependencies = [
+ "fnv",
+ "quick-error",
+ "tempfile",
+ "wait-timeout",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.18"
@@ -2381,6 +2479,7 @@ dependencies = [
  "once_cell",
  "parking_lot",
  "pdb-addr2line",
+ "proptest",
  "regex",
  "scroll 0.12.0",
  "serde",
@@ -2741,6 +2840,12 @@ version = "2.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
 
+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+
 [[package]]
 name = "unicode-id-start"
 version = "1.0.4"
@@ -2823,6 +2928,15 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
+[[package]]
+name = "wait-timeout"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -3191,6 +3305,7 @@ version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
+ "byteorder",
  "zerocopy-derive",
 ]
 
diff --git a/Cargo.toml b/Cargo.toml
index 82346d281..e8f17957d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -41,6 +41,7 @@ once_cell = "1.17.1"
 parking_lot = "0.12.1"
 pdb-addr2line = "0.10.4"
 proguard = { version = "5.4.0", features = ["uuid"] }
+proptest = "1.6.0"
 regex = "1.7.1"
 rustc-demangle = "0.1.21"
 # keep this in sync with whatever version `goblin` uses
diff --git a/symbolic-debuginfo/Cargo.toml b/symbolic-debuginfo/Cargo.toml
index 2f08e1d32..f20ee6472 100644
--- a/symbolic-debuginfo/Cargo.toml
+++ b/symbolic-debuginfo/Cargo.toml
@@ -115,6 +115,7 @@ zstd = { workspace = true, optional = true }
 [dev-dependencies]
 criterion = { workspace = true }
 insta = { workspace = true }
+proptest = {workspace = true }
 tempfile = { workspace = true }
 similar-asserts = { workspace = true }
 symbolic-testutils = { path = "../symbolic-testutils" }
diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle/mod.rs
similarity index 98%
rename from symbolic-debuginfo/src/sourcebundle.rs
rename to symbolic-debuginfo/src/sourcebundle/mod.rs
index 1dbce62c7..01bbac277 100644
--- a/symbolic-debuginfo/src/sourcebundle.rs
+++ b/symbolic-debuginfo/src/sourcebundle/mod.rs
@@ -42,15 +42,17 @@
 //! bundle a file entry has a `url` and might carry `headers` or individual debug IDs
 //! per source file.
 
+mod utf8_reader;
+
 use std::borrow::Cow;
 use std::collections::{BTreeMap, BTreeSet, HashMap};
 use std::error::Error;
-use std::fmt;
 use std::fmt::{Display, Formatter};
 use std::fs::{File, OpenOptions};
-use std::io::{BufReader, BufWriter, Read, Seek, Write};
+use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek, Write};
 use std::path::Path;
 use std::sync::Arc;
+use std::{fmt, io};
 
 use parking_lot::Mutex;
 use regex::Regex;
@@ -60,6 +62,7 @@ use zip::{write::SimpleFileOptions, ZipWriter};
 
 use symbolic_common::{Arch, AsSelf, CodeId, DebugId, SourceLinkMappings};
 
+use self::utf8_reader::Utf8Reader;
 use crate::base::*;
 use crate::js::{
     discover_debug_id, discover_sourcemap_embedded_debug_id, discover_sourcemaps_location,
@@ -1217,18 +1220,14 @@ where
     pub fn add_file<S, R>(
         &mut self,
         path: S,
-        mut file: R,
+        file: R,
         info: SourceFileInfo,
     ) -> Result<(), SourceBundleError>
     where
         S: AsRef<str>,
         R: Read,
     {
-        let mut buf = String::new();
-
-        if let Err(e) = file.read_to_string(&mut buf) {
-            return Err(SourceBundleError::new(SourceBundleErrorKind::ReadFailed, e));
-        }
+        let mut file_reader = Utf8Reader::new(file);
 
         let full_path = self.file_path(path.as_ref());
         let unique_path = self.unique_path(full_path);
@@ -1236,12 +1235,26 @@ where
         self.writer
             .start_file(unique_path.clone(), default_file_options())
             .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
-        self.writer
-            .write_all(buf.as_bytes())
-            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
 
-        self.manifest.files.insert(unique_path, info);
-        Ok(())
+        match io::copy(&mut file_reader, &mut self.writer) {
+            Err(e) => {
+                self.writer
+                    .abort_file()
+                    .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
+
+                // ErrorKind::InvalidData is returned by Utf8Reader when the file is not valid UTF-8.
+                let error_kind = match e.kind() {
+                    ErrorKind::InvalidData => SourceBundleErrorKind::ReadFailed,
+                    _ => SourceBundleErrorKind::WriteFailed,
+                };
+
+                Err(SourceBundleError::new(error_kind, e))
+            }
+            Ok(_) => {
+                self.manifest.files.insert(unique_path, info);
+                Ok(())
+            }
+        }
     }
 
     /// Calls add_file, and handles any ReadFailed errors by calling the skipped_file_callback.
diff --git a/symbolic-debuginfo/src/sourcebundle/utf8_reader.rs b/symbolic-debuginfo/src/sourcebundle/utf8_reader.rs
new file mode 100644
index 000000000..64713814e
--- /dev/null
+++ b/symbolic-debuginfo/src/sourcebundle/utf8_reader.rs
@@ -0,0 +1,306 @@
+//! UTF-8 reader used by the sourcebundle module to read files.
+
+use std::cmp;
+use std::io::{Error, ErrorKind, Read, Result};
+use std::str;
+
+use thiserror::Error;
+
+const MAX_UTF8_SEQUENCE_SIZE: usize = 4;
+
+#[derive(Debug, Error)]
+#[error("Invalid UTF-8 sequence")]
+pub(crate) struct UTF8ReaderError;
+
+pub struct Utf8Reader<R> {
+    inner: R,
+
+    /// A buffer of `MAX_UTF8_SEQUENCE_SIZE` bytes, which we use when the end of a read is not a
+    /// valid UTF-8 sequence. We read into this buffer until we have a valid UTF-8 sequence, or
+    /// the reader is exhausted, or the buffer is full. Assuming we get a valid UTF-8 sequence,
+    /// we then on next read read from this buffer.
+    buffer: [u8; MAX_UTF8_SEQUENCE_SIZE],
+
+    /// The index of the first byte in the buffer that has not been read.
+    buffer_start: usize,
+
+    /// The index of the last byte in the buffer that is part of the UTF-8 sequence. We only read
+    /// up to this index, bytes after this index are discarded.
+    buffer_end: usize,
+}
+
+impl<R> Utf8Reader<R> {
+    pub fn new(inner: R) -> Self {
+        Self {
+            inner,
+            buffer: [0; MAX_UTF8_SEQUENCE_SIZE],
+            buffer_start: 0,
+            buffer_end: 0,
+        }
+    }
+
+    /// The slice of the buffer that should be read next.
+    fn buffer_to_read(&self) -> &[u8] {
+        &self.buffer[self.buffer_start..self.buffer_end]
+    }
+
+    /// Advances the buffer start index by the given amount.
+    fn advance(&mut self, amt: usize) {
+        self.buffer_start += amt;
+    }
+
+    /// Reads bytes from the buffer into the given buffer.
+    fn read_from_buffer(&mut self, buf: &mut [u8]) -> Result<usize> {
+        let bytes_copied = slice_copy(buf, self.buffer_to_read());
+        self.advance(bytes_copied);
+
+        Ok(bytes_copied)
+    }
+}
+
+impl<R> Utf8Reader<R>
+where
+    R: Read,
+{
+    /// Reads bytes from the inner reader into the given buffer.
+    ///
+    /// If a UTF-8 sequence only partially fits into the buffer, the remaining
+    /// bytes are written to `self.buffer`.
+    fn read_from_inner(&mut self, buf: &mut [u8]) -> Result<usize> {
+        let read_from_inner = self.inner.read(buf)?;
+        let read_portion = &buf[..read_from_inner];
+
+        let invalid_portion = ending_incomplete_utf8_sequence(read_portion)?;
+
+        slice_copy(&mut self.buffer, invalid_portion);
+        self.read_into_buffer_until_utf8(invalid_portion.len())?;
+
+        Ok(read_from_inner)
+    }
+
+    /// Reads bytes from the inner reader into self.buffer until the buffer contains a valid UTF-8
+    /// sequence.
+    ///
+    /// Before calling this method, self.buffer[..start_index] should contain the bytes that are
+    /// part of the incomplete UTF-8 sequence that we are trying to complete (or, it should be
+    /// empty, in which case, this function is a no-op)
+    ///
+    /// Then, we read one byte at a time from the inner reader into self.buffer, until we have a
+    /// valid UTF-8 sequence.
+    ///
+    /// Lastly, we set self.buffer_start to start_index (so we don't reread the bytes that started
+    /// the incomplete UTF-8 sequence) and self.buffer_end to the index of the last byte in the
+    /// buffer that is part of the UTF-8 sequence.
+    ///
+    /// The next read from the Utf8Reader will read from
+    /// self.buffer[self.buffer_start..self.buffer_end].
+    fn read_into_buffer_until_utf8(&mut self, start_index: usize) -> Result<()> {
+        let bytes_until_utf8 = read_until_utf8(&mut self.inner, &mut self.buffer, start_index)?;
+
+        self.buffer_start = start_index;
+        self.buffer_end = bytes_until_utf8;
+
+        Ok(())
+    }
+}
+
+impl<R> Read for Utf8Reader<R>
+where
+    R: Read,
+{
+    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
+        let read_from_buffer = self.read_from_buffer(buf)?;
+
+        // self.read_from_inner overwrites self.buffer, so we can only call it if we have read
+        // everything from the buffer.
+        let read_from_inner = match self.buffer_to_read() {
+            [] => self.read_from_inner(&mut buf[read_from_buffer..])?,
+            _ => 0,
+        };
+
+        Ok(read_from_buffer + read_from_inner)
+    }
+}
+
+/// Reads a single byte at a time from the inner reader into the buffer, starting from
+/// current_index, until either the buffer contains a valid UTF-8 sequence, the reader is
+/// exhausted, or the 4 bytes total have been read into the buffer (the maximum size of a
+/// UTF-8 sequence). The 4 bytes also includes bytes already in the buffer when the function is
+/// called, as indicated by the current_index parameter.
+///
+/// If the buffer is empty (i.e. current_index is 0), we will return Ok(0) without reading
+/// anything from the reader. An empty byte sequence is a valid UTF-8 sequence (the
+/// empty string).
+///
+/// Returns the number of bytes read into the buffer (including bytes already in the buffer
+/// when the function is called), or an error if the reader errors or if reading a valid UTF-8
+/// sequence is not possible.
+///
+/// The buffer must have a length of at least 4, otherwise this function can panic.
+fn read_until_utf8(
+    reader: &mut impl Read,
+    buffer: &mut [u8],
+    mut current_index: usize,
+) -> Result<usize> {
+    while str::from_utf8(&buffer[..current_index]).is_err() {
+        if current_index >= MAX_UTF8_SEQUENCE_SIZE
+            || reader.read(&mut buffer[current_index..current_index + 1])? == 0
+        {
+            // We already have 4 bytes in the buffer (maximum UTF-8 sequence size), or the stream
+            // has been exhausted without finding a valid UTF-8 sequence.
+            return Err(Error::new(ErrorKind::InvalidData, UTF8ReaderError));
+        }
+
+        current_index += 1;
+    }
+
+    Ok(current_index)
+}
+
+/// Returns the index of the first invalid UTF-8 sequence
+/// in the given bytes. If the sequence is valid, returns the
+/// length of the bytes.
+fn utf8_up_to(bytes: &[u8]) -> usize {
+    match str::from_utf8(bytes) {
+        Ok(_) => bytes.len(),
+        Err(e) => e.valid_up_to(),
+    }
+}
+
+/// Given a byte slice, determines if the slice ends in what might be an incomplete UTF-8 sequence,
+/// returning the incomplete sequence if so.
+///
+/// The following return values are possible:
+///   - Ok([]) if the byte slice is valid UTF-8, in its entirety.
+///   - Ok(incomplete_sequence) if the byte slice is valid up to the `incomplete_sequence` slice
+///     returned by this function. `incomplete_sequence` is always a slice of at most 3 bytes
+///     occurring at the end of the input slice. In this case, it might be possible to make the
+///     input slice a valid UTF-8 sequence by appending bytes to the input slice.
+///   - Err(e) if the first UTF-8 violation in the input slice occurs more than 3 bytes from the
+///     end of the input slice. In this case, it is definitely not possible to make the sequence
+///     valid by appending bytes to the input slice.
+fn ending_incomplete_utf8_sequence(bytes: &[u8]) -> Result<&[u8]> {
+    let valid_up_to = utf8_up_to(bytes);
+    let invalid_portion = &bytes[valid_up_to..];
+
+    if invalid_portion.len() >= MAX_UTF8_SEQUENCE_SIZE {
+        Err(Error::new(ErrorKind::InvalidData, UTF8ReaderError))
+    } else {
+        Ok(invalid_portion)
+    }
+}
+
+/// Copies as many elements as possible (i.e. the smaller of the two slice lengths) from the
+/// beginning of the source slice to the beginning of the destination slice, overwriting anything
+/// already in the destination slice.
+///
+/// Returns the number of elements copied.
+fn slice_copy<T>(dst: &mut [T], src: &[T]) -> usize
+where
+    T: Copy,
+{
+    let elements_to_copy = cmp::min(dst.len(), src.len());
+    dst[..elements_to_copy].copy_from_slice(&src[..elements_to_copy]);
+
+    elements_to_copy
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use proptest::prelude::*;
+
+    use core::str;
+    use std::io::Cursor;
+
+    #[test]
+    fn test_read_empty() {
+        let mut empty_reader = Utf8Reader::new(Cursor::new(b""));
+
+        let mut buf = vec![];
+        empty_reader
+            .read_to_end(&mut buf)
+            .expect("read_to_end errored");
+
+        assert_eq!(buf, b"");
+    }
+
+    #[test]
+    fn test_read_ascii_simple() {
+        let mut reader = Utf8Reader::new(Cursor::new(b"Hello, world!"));
+
+        let mut buf = vec![];
+        reader.read_to_end(&mut buf).expect("read_to_end errored");
+
+        assert_eq!(buf, b"Hello, world!");
+    }
+
+    #[test]
+    fn test_read_utf8_simple() {
+        const HELLO_WORLD: &str = "你好世界！";
+        let mut reader = Utf8Reader::new(Cursor::new(HELLO_WORLD.as_bytes()));
+
+        let mut buf = vec![];
+        reader.read_to_end(&mut buf).expect("read_to_end errored");
+
+        assert_eq!(buf, HELLO_WORLD.as_bytes());
+    }
+
+    #[test]
+    fn small_reads_splitting_sequence() {
+        let mut reader = Utf8Reader::new(Cursor::new("🙂".as_bytes()));
+
+        let mut buf = [0; MAX_UTF8_SEQUENCE_SIZE];
+
+        for i in 0..MAX_UTF8_SEQUENCE_SIZE {
+            // Read at most one byte at a time.
+            let bytes_read = reader.read(&mut buf[i..i + 1]).expect("read errored");
+            assert_eq!(bytes_read, 1, "bytes read");
+        }
+
+        assert_eq!(&buf[..], "🙂".as_bytes());
+    }
+
+    #[test]
+    fn invalid_utf8_sequence() {
+        let mut reader = Utf8Reader::new(Cursor::new([0b11111111]));
+
+        let mut buf = [0; 1];
+        reader.read(&mut buf).expect_err("read should have errored");
+    }
+
+    #[test]
+    fn invalid_utf8_sequence_at_end_of_reader() {
+        let mut read_buffer = Vec::from(b"Hello, world!");
+
+        // Cutting off the last byte will invalidate the UTF-8 sequence.
+        let invalid_sequence = &"🙂".as_bytes()[..'🙂'.len_utf8() - 1];
+        read_buffer.extend(invalid_sequence);
+
+        let mut reader = Utf8Reader::new(Cursor::new(&read_buffer));
+        reader
+            .read_to_end(&mut vec![])
+            .expect_err("read should have errored");
+    }
+
+    proptest! {
+        #[test]
+        fn read_arbitrary_string(s in any::<String>()) {
+            let mut buf = vec![];
+            let mut reader = Utf8Reader::new(Cursor::new(&s));
+
+            reader.read_to_end(&mut buf).unwrap();
+            assert_eq!(str::from_utf8(&buf).unwrap(), s);
+        }
+
+        #[test]
+        fn dont_read_arbitrary_nonstring(s in any::<Vec<u8>>()) {
+            prop_assume!(str::from_utf8(&s).is_err());
+            let mut buf = vec![];
+            let mut reader = Utf8Reader::new(Cursor::new(&s));
+
+            reader.read_to_end(&mut buf).unwrap_err();
+        }
+    }
+}