From 884faebb05a965b62fc910733395edce974bbf9e Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Thu, 27 Mar 2025 16:00:13 +0100 Subject: [PATCH] fix(sourcemaps): Avoid associating only sourcemap with all minified sources Remove the branch from `guess_sourcemap_reference` which handles the case of there only being one sourcemap. If there are multiple minified souces, they would all (erroneously) end up associated with the same single sourcemap. Also, since code for uploading bundles was relying on this branch (specifically when unpacking bundles), refactor so that we use the sourcemap which is passed to the command directly, rather thna "guessing" it. Fixes #2438 Fixes #2503 --- src/commands/react_native/gradle.rs | 12 +- src/commands/sourcemaps/upload.rs | 14 +- src/utils/file_upload.rs | 73 +++++++ src/utils/sourcemaps.rs | 183 +++++------------- .../sourcemaps-upload-file-ram-bundle.trycmd | 2 +- ...ourcemaps-upload-indexed-ram-bundle.trycmd | 2 +- 6 files changed, 134 insertions(+), 152 deletions(-) diff --git a/src/commands/react_native/gradle.rs b/src/commands/react_native/gradle.rs index faf419290e..91d2a27559 100644 --- a/src/commands/react_native/gradle.rs +++ b/src/commands/react_native/gradle.rs @@ -12,7 +12,7 @@ use crate::config::Config; use crate::constants::DEFAULT_MAX_WAIT; use crate::utils::args::{validate_distribution, ArgExt}; use crate::utils::file_search::ReleaseFileSearch; -use crate::utils::file_upload::UploadContext; +use crate::utils::file_upload::{SourceFile, UploadContext}; use crate::utils::sourcemaps::SourceMapProcessor; pub fn make_command(command: Command) -> Command { @@ -92,16 +92,16 @@ pub fn execute(matches: &ArgMatches) -> Result<()> { &bundle_url, ReleaseFileSearch::collect_file(bundle_path.clone())?, ); - processor.add( - &sourcemap_url, - ReleaseFileSearch::collect_file(sourcemap_path)?, - ); + + let sourcemap_match = ReleaseFileSearch::collect_file(sourcemap_path.clone())?; if let Ok(ram_bundle) = RamBundle::parse_unbundle_from_path(&bundle_path) { debug!("File RAM bundle found, extracting its contents..."); - processor.unpack_ram_bundle(&ram_bundle, &bundle_url)?; + let sourcemap_source = SourceFile::from_release_file_match(&sourcemap_url, sourcemap_match); + processor.unpack_ram_bundle(&ram_bundle, &bundle_url, &sourcemap_source)?; } else { debug!("Non-file bundle found"); + processor.add(&sourcemap_url, sourcemap_match); } processor.rewrite(&[base.to_str().unwrap()])?; diff --git a/src/commands/sourcemaps/upload.rs b/src/commands/sourcemaps/upload.rs index b1ff75b9fb..7536d42dda 100644 --- a/src/commands/sourcemaps/upload.rs +++ b/src/commands/sourcemaps/upload.rs @@ -13,7 +13,7 @@ use crate::config::Config; use crate::constants::DEFAULT_MAX_WAIT; use crate::utils::args::validate_distribution; use crate::utils::file_search::ReleaseFileSearch; -use crate::utils::file_upload::UploadContext; +use crate::utils::file_upload::{SourceFile, UploadContext}; use crate::utils::fs::path_as_url; use crate::utils::sourcemaps::SourceMapProcessor; @@ -303,21 +303,23 @@ fn process_sources_from_bundle( &bundle_url, ReleaseFileSearch::collect_file(bundle_path.clone())?, ); - processor.add( - &sourcemap_url, - ReleaseFileSearch::collect_file(sourcemap_path)?, - ); + let sourcemap_match = ReleaseFileSearch::collect_file(sourcemap_path)?; if let Ok(ram_bundle) = sourcemap::ram_bundle::RamBundle::parse_unbundle_from_path(&bundle_path) { debug!("File RAM bundle found, extracting its contents..."); // For file ("unbundle") RAM bundles we need to explicitly unpack it, otherwise we cannot detect it // reliably inside "processor.rewrite()" - processor.unpack_ram_bundle(&ram_bundle, &bundle_url)?; + + let sourcemap_source = SourceFile::from_release_file_match(&sourcemap_url, sourcemap_match); + processor.unpack_ram_bundle(&ram_bundle, &bundle_url, &sourcemap_source)?; } else if sourcemap::ram_bundle::RamBundle::parse_indexed_from_path(&bundle_path).is_ok() { debug!("Indexed RAM bundle found"); + let sourcemap_source = SourceFile::from_release_file_match(&sourcemap_url, sourcemap_match); + processor.unpack_indexed_ram_bundles(&sourcemap_source)?; } else { warn!("Regular bundle found"); + processor.add(&sourcemap_url, sourcemap_match); } let mut prefixes = get_prefixes_from_args(matches); diff --git a/src/utils/file_upload.rs b/src/utils/file_upload.rs index ee4d3edb50..c00ae705d2 100644 --- a/src/utils/file_upload.rs +++ b/src/utils/file_upload.rs @@ -1,5 +1,6 @@ //! Searches, processes and uploads release files. use std::collections::{BTreeMap, HashMap}; +use std::ffi::OsStr; use std::fmt::{self, Display}; use std::io::BufWriter; use std::path::PathBuf; @@ -16,6 +17,7 @@ use rayon::ThreadPoolBuilder; use sentry::types::DebugId; use sha1_smol::Digest; use symbolic::common::ByteView; +use symbolic::debuginfo::js; use symbolic::debuginfo::sourcebundle::{ SourceBundleErrorKind, SourceBundleWriter, SourceFileInfo, SourceFileType, }; @@ -29,6 +31,8 @@ use crate::utils::chunks::{upload_chunks, Chunk, ASSEMBLE_POLL_INTERVAL}; use crate::utils::fs::{get_sha1_checksum, get_sha1_checksums, TempFile}; use crate::utils::progress::{ProgressBar, ProgressBarMode, ProgressStyle}; +use super::file_search::ReleaseFileMatch; + /// Fallback concurrency for release file uploads. static DEFAULT_CONCURRENCY: usize = 4; @@ -239,6 +243,68 @@ pub struct SourceFile { } impl SourceFile { + pub fn from_release_file_match(url: &str, mut file: ReleaseFileMatch) -> SourceFile { + let (ty, debug_id) = if sourcemap::is_sourcemap_slice(&file.contents) { + ( + SourceFileType::SourceMap, + std::str::from_utf8(&file.contents) + .ok() + .and_then(js::discover_sourcemap_embedded_debug_id), + ) + } else if file + .path + .file_name() + .and_then(OsStr::to_str) + .map(|x| x.ends_with("bundle")) + .unwrap_or(false) + && sourcemap::ram_bundle::is_ram_bundle_slice(&file.contents) + { + (SourceFileType::IndexedRamBundle, None) + } else if is_hermes_bytecode(&file.contents) { + // This is actually a big hack: + // For the react-native Hermes case, we skip uploading the bytecode bundle, + // and rather flag it as an empty "minified source". That way, it + // will get a SourceMap reference, and the server side processor + // should deal with it accordingly. + file.contents.clear(); + (SourceFileType::MinifiedSource, None) + } else { + // Here, we use MinifiedSource for historical reasons. We used to guess whether + // a JS file was a minified file or a source file, and we would treat these files + // differently when uploading or injecting them. However, the desired behavior is + // and has always been to treat all JS files the same, since users should be + // responsible for providing the file paths for only files they would like to have + // uploaded or injected. The minified file guessing furthermore was not reliable, + // since minification is not a necessary step in the JS build process. + // + // We use MinifiedSource here rather than Source because we want to treat all JS + // files the way we used to treat minified files only. To use Source, we would need + // to analyze all possible code paths that check this value, and update those as + // well. To keep the change minimal, we use MinifiedSource here. + ( + SourceFileType::MinifiedSource, + std::str::from_utf8(&file.contents) + .ok() + .and_then(js::discover_debug_id), + ) + }; + + let mut source_file = SourceFile { + url: url.into(), + path: file.path, + contents: file.contents.into(), + ty, + headers: BTreeMap::new(), + messages: vec![], + already_uploaded: false, + }; + + if let Some(debug_id) = debug_id { + source_file.set_debug_id(debug_id.to_string()); + } + source_file + } + /// Calculates and returns the SHA1 checksum of the file. pub fn checksum(&self) -> Result { get_sha1_checksum(&**self.contents) @@ -753,6 +819,13 @@ fn print_upload_context_details(context: &UploadContext) { ); } +fn is_hermes_bytecode(slice: &[u8]) -> bool { + // The hermes bytecode format magic is defined here: + // https://github.com/facebook/hermes/blob/5243222ef1d92b7393d00599fc5cff01d189a88a/include/hermes/BCGen/HBC/BytecodeFileFormat.h#L24-L25 + const HERMES_MAGIC: [u8; 8] = [0xC6, 0x1F, 0xBC, 0x03, 0xC1, 0x03, 0x19, 0x1F]; + slice.starts_with(&HERMES_MAGIC) +} + #[cfg(test)] mod tests { use sha1_smol::Sha1; diff --git a/src/utils/sourcemaps.rs b/src/utils/sourcemaps.rs index 7bd66ef1ba..dc3c47c3da 100644 --- a/src/utils/sourcemaps.rs +++ b/src/utils/sourcemaps.rs @@ -1,6 +1,5 @@ //! Provides sourcemap validation functionality. use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; -use std::ffi::OsStr; use std::io::Write; use std::mem; use std::path::{Path, PathBuf}; @@ -15,9 +14,7 @@ use log::{debug, info, warn}; use sentry::types::DebugId; use sha1_smol::Digest; use sourcemap::{DecodedMap, SourceMap}; -use symbolic::debuginfo::js::{ - discover_debug_id, discover_sourcemap_embedded_debug_id, discover_sourcemaps_location, -}; +use symbolic::debuginfo::js::discover_sourcemaps_location; use symbolic::debuginfo::sourcebundle::SourceFileType; use url::Url; @@ -111,49 +108,40 @@ fn guess_sourcemap_reference( sourcemaps: &HashSet, min_url: &str, ) -> Result { - // if there is only one sourcemap in total we just assume that's the one. - // We just need to make sure that we fix up the reference if we need to - // (eg: ~/ -> /). - if sourcemaps.len() == 1 { - let original_url = sourcemaps.iter().next().unwrap(); - return Ok(SourceMapReference { - url: sourcemap::make_relative_path(min_url, original_url), - original_url: Option::from(original_url.to_string()), - }); - } - let map_ext = "map"; let (path, basename, ext) = split_url(min_url); // foo.min.js -> foo.map - if sourcemaps.contains(&unsplit_url(path, basename, Some("map"))) { - return Ok(SourceMapReference::from_url(unsplit_url( - None, - basename, - Some("map"), - ))); + let url_with_path = unsplit_url(path, basename, Some("map")); + if sourcemaps.contains(&url_with_path) { + return Ok( + SourceMapReference::from_url(unsplit_url(None, basename, Some("map"))) + .with_original_url(url_with_path), + ); } if let Some(ext) = ext.as_ref() { // foo.min.js -> foo.min.js.map let new_ext = format!("{ext}.{map_ext}"); - if sourcemaps.contains(&unsplit_url(path, basename, Some(&new_ext))) { - return Ok(SourceMapReference::from_url(unsplit_url( - None, - basename, - Some(&new_ext), - ))); + let url_with_path = unsplit_url(path, basename, Some(&new_ext)); + if sourcemaps.contains(&url_with_path) { + return Ok( + SourceMapReference::from_url(unsplit_url(None, basename, Some(&new_ext))) + .with_original_url(url_with_path), + ); } // foo.min.js -> foo.js.map if let Some(rest) = ext.strip_prefix("min.") { let new_ext = format!("{rest}.{map_ext}"); - if sourcemaps.contains(&unsplit_url(path, basename, Some(&new_ext))) { + let url_with_path = unsplit_url(path, basename, Some(&new_ext)); + if sourcemaps.contains(&url_with_path) { return Ok(SourceMapReference::from_url(unsplit_url( None, basename, Some(&new_ext), - ))); + )) + .with_original_url(url_with_path)); } } @@ -163,12 +151,14 @@ fn guess_sourcemap_reference( let parts_len = parts.len(); parts[parts_len - 1] = map_ext; let new_ext = parts.join("."); - if sourcemaps.contains(&unsplit_url(path, basename, Some(&new_ext))) { + let url_with_path = unsplit_url(path, basename, Some(&new_ext)); + if sourcemaps.contains(&url_with_path) { return Ok(SourceMapReference::from_url(unsplit_url( None, basename, Some(&new_ext), - ))); + )) + .with_original_url(url_with_path)); } } } @@ -192,6 +182,11 @@ impl SourceMapReference { original_url: None, } } + + fn with_original_url(mut self, original_url: String) -> Self { + self.original_url = Some(original_url); + self + } } pub struct SourceMapProcessor { @@ -201,13 +196,6 @@ pub struct SourceMapProcessor { debug_ids: HashMap, } -fn is_hermes_bytecode(slice: &[u8]) -> bool { - // The hermes bytecode format magic is defined here: - // https://github.com/facebook/hermes/blob/5243222ef1d92b7393d00599fc5cff01d189a88a/include/hermes/BCGen/HBC/BytecodeFileFormat.h#L24-L25 - const HERMES_MAGIC: [u8; 8] = [0xC6, 0x1F, 0xBC, 0x03, 0xC1, 0x03, 0x19, 0x1F]; - slice.starts_with(&HERMES_MAGIC) -} - fn url_matches_extension(url: &str, extensions: &[&str]) -> bool { if extensions.is_empty() { return true; @@ -275,75 +263,25 @@ impl SourceMapProcessor { style(">").dim(), style(self.pending_sources.len()).yellow() ); - for (url, mut file) in self.pending_sources.drain() { + for (url, file) in mem::take(&mut self.pending_sources) { pb.set_message(&url); - - let (ty, debug_id) = if sourcemap::is_sourcemap_slice(&file.contents) { - ( - SourceFileType::SourceMap, - std::str::from_utf8(&file.contents) - .ok() - .and_then(discover_sourcemap_embedded_debug_id), - ) - } else if file - .path - .file_name() - .and_then(OsStr::to_str) - .map(|x| x.ends_with("bundle")) - .unwrap_or(false) - && sourcemap::ram_bundle::is_ram_bundle_slice(&file.contents) - { - (SourceFileType::IndexedRamBundle, None) - } else if is_hermes_bytecode(&file.contents) { - // This is actually a big hack: - // For the react-native Hermes case, we skip uploading the bytecode bundle, - // and rather flag it as an empty "minified source". That way, it - // will get a SourceMap reference, and the server side processor - // should deal with it accordingly. - file.contents.clear(); - (SourceFileType::MinifiedSource, None) - } else { - // Here, we use MinifiedSource for historical reasons. We used to guess whether - // a JS file was a minified file or a source file, and we would treat these files - // differently when uploading or injecting them. However, the desired behavior is - // and has always been to treat all JS files the same, since users should be - // responsible for providing the file paths for only files they would like to have - // uploaded or injected. The minified file guessing furthermore was not reliable, - // since minification is not a necessary step in the JS build process. - // - // We use MinifiedSource here rather than Source because we want to treat all JS - // files the way we used to treat minified files only. To use Source, we would need - // to analyze all possible code paths that check this value, and update those as - // well. To keep the change minimal, we use MinifiedSource here. - ( - SourceFileType::MinifiedSource, - std::str::from_utf8(&file.contents) - .ok() - .and_then(discover_debug_id), - ) - }; - - let mut source_file = SourceFile { - url: url.clone(), - path: file.path, - contents: file.contents.into(), - ty, - headers: BTreeMap::new(), - messages: vec![], - already_uploaded: false, - }; - - if let Some(debug_id) = debug_id { - source_file.set_debug_id(debug_id.to_string()); - self.debug_ids.insert(url.clone(), debug_id); - } - - self.sources.insert(url.clone(), source_file); + self.add_file_to_sources(SourceFile::from_release_file_match(&url, file)); pb.inc(1); } pb.finish_with_duration("Analyzing"); } + /// Adds a given source_file to sources, also inserting its debug_id if present + /// and parsable as a DebugId. + fn add_file_to_sources(&mut self, source_file: SourceFile) { + let source_file_url = source_file.url.clone(); + if let Some(debug_id) = source_file.debug_id().and_then(|id| id.parse().ok()) { + self.debug_ids.insert(source_file_url.clone(), debug_id); + } + + self.sources.insert(source_file_url, source_file); + } + /// Collect references to sourcemaps in minified source files /// and saves them in `self.sourcemap_references`. fn collect_sourcemap_references(&mut self) { @@ -510,43 +448,12 @@ impl SourceMapProcessor { &mut self, ram_bundle: &sourcemap::ram_bundle::RamBundle, bundle_source_url: &str, + sourcemap_source: &SourceFile, ) -> Result<()> { // We need this to flush all pending sourcemaps self.flush_pending_sources(); - debug!("Trying to guess the sourcemap reference"); - let sourcemaps_references = self - .sources - .values() - .filter(|x| x.ty == SourceFileType::SourceMap) - .map(|x| x.url.to_string()) - .collect(); - - let sourcemap_url = - match guess_sourcemap_reference(&sourcemaps_references, bundle_source_url) { - Ok(filename) => { - let (path, _, _) = split_url(bundle_source_url); - unsplit_url(path, &filename.url, None) - } - Err(_) => { - warn!("Sourcemap reference for {} not found!", bundle_source_url); - return Ok(()); - } - }; - debug!( - "Sourcemap reference for {} found: {}", - bundle_source_url, sourcemap_url - ); - - let Some(sourcemap_source) = self.sources.get(&sourcemap_url) else { - warn!( - "Cannot find the sourcemap for the RAM bundle using the URL: {}, skipping", - sourcemap_url - ); - return Ok(()); - }; let sourcemap_content = &sourcemap_source.contents; - let sourcemap_index = match sourcemap::decode_slice(sourcemap_content)? { sourcemap::DecodedMap::Index(sourcemap_index) => sourcemap_index, _ => { @@ -565,7 +472,7 @@ impl SourceMapProcessor { let mut index_sourcemap_content: Vec = vec![]; index_section.to_writer(&mut index_sourcemap_content)?; self.sources.insert( - sourcemap_url.clone(), + sourcemap_source.url.clone(), SourceFile { url: sourcemap_source.url.clone(), path: sourcemap_source.path.clone(), @@ -620,7 +527,9 @@ impl SourceMapProcessor { } /// Replaces indexed RAM bundle entries with their expanded sources and sourcemaps - pub fn unpack_indexed_ram_bundles(&mut self) -> Result<()> { + pub fn unpack_indexed_ram_bundles(&mut self, sourcemap_source: &SourceFile) -> Result<()> { + self.flush_pending_sources(); + let mut ram_bundles = Vec::new(); // Drain RAM bundles from self.sources @@ -637,7 +546,7 @@ impl SourceMapProcessor { let ram_bundle = sourcemap::ram_bundle::RamBundle::parse_indexed_from_slice( &bundle_source.contents, )?; - self.unpack_ram_bundle(&ram_bundle, &bundle_source.url)?; + self.unpack_ram_bundle(&ram_bundle, &bundle_source.url, sourcemap_source)?; } Ok(()) } @@ -650,8 +559,6 @@ impl SourceMapProcessor { println!("{} Rewriting sources", style(">").dim()); - self.unpack_indexed_ram_bundles()?; - let progress_style = ProgressStyle::default_bar().template(&format!( "{} {{msg}}\n{{wide_bar}} {{pos}}/{{len}}", style(">").cyan() diff --git a/tests/integration/_cases/sourcemaps/sourcemaps-upload-file-ram-bundle.trycmd b/tests/integration/_cases/sourcemaps/sourcemaps-upload-file-ram-bundle.trycmd index 78a369e5b6..c62cd8a7c3 100644 --- a/tests/integration/_cases/sourcemaps/sourcemaps-upload-file-ram-bundle.trycmd +++ b/tests/integration/_cases/sourcemaps/sourcemaps-upload-file-ram-bundle.trycmd @@ -1,7 +1,7 @@ ``` $ sentry-cli sourcemaps upload --bundle tests/integration/_fixtures/file-ram-bundle/index.android.bundle --bundle-sourcemap tests/integration/_fixtures/file-ram-bundle/index.android.bundle.packager.map --release=wat-release ? success -> Analyzing 2 sources +> Analyzing 1 sources > Rewriting sources > Adding source map references > Bundled 14 files for upload diff --git a/tests/integration/_cases/sourcemaps/sourcemaps-upload-indexed-ram-bundle.trycmd b/tests/integration/_cases/sourcemaps/sourcemaps-upload-indexed-ram-bundle.trycmd index 4f0a6e9ff3..613e717b43 100644 --- a/tests/integration/_cases/sourcemaps/sourcemaps-upload-indexed-ram-bundle.trycmd +++ b/tests/integration/_cases/sourcemaps/sourcemaps-upload-indexed-ram-bundle.trycmd @@ -1,7 +1,7 @@ ``` $ sentry-cli sourcemaps upload --bundle tests/integration/_fixtures/indexed-ram-bundle/main.jsbundle --bundle-sourcemap tests/integration/_fixtures/indexed-ram-bundle/main.jsbundle.packager.map --release=wat-release ? success -> Analyzing 2 sources +> Analyzing 1 sources > Rewriting sources > Adding source map references > Bundled 2107 files for upload