diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b05b9c1..6c36d5f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -66,7 +66,7 @@ jobs:
         shell: bash
         run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.2/cargo-dist-installer.sh | sh"
       - name: Cache dist
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: cargo-dist-cache
           path: ~/.cargo/bin/dist
@@ -82,7 +82,7 @@ jobs:
           cat plan-dist-manifest.json
           echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
       - name: "Upload dist-manifest.json"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: artifacts-plan-dist-manifest
           path: plan-dist-manifest.json
@@ -135,7 +135,7 @@ jobs:
         run: ${{ matrix.install_dist.run }}
       # Get the dist-manifest
       - name: Fetch local artifacts
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           pattern: artifacts-*
           path: target/distrib/
@@ -151,7 +151,7 @@ jobs:
           dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
           echo "dist ran successfully"
       - name: Attest
-        uses: actions/attest-build-provenance@v2
+        uses: actions/attest-build-provenance@v3
         with:
           subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*"
       - id: cargo-dist
@@ -168,7 +168,7 @@ jobs:
 
           cp dist-manifest.json "$BUILD_MANIFEST_NAME"
       - name: "Upload artifacts"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: artifacts-build-local-${{ join(matrix.targets, '_') }}
           path: |
@@ -190,7 +190,7 @@ jobs:
           persist-credentials: false
           submodules: recursive
       - name: Install cached dist
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           name: cargo-dist-cache
           path: ~/.cargo/bin/
@@ -202,7 +202,7 @@ jobs:
         shell: bash
       # Get all the local artifacts for the global tasks to use (for e.g. checksums)
       - name: Fetch local artifacts
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           pattern: artifacts-*
           path: target/distrib/
@@ -233,7 +233,7 @@ jobs:
             find . -name '*.cdx.xml' | tee -a "$GITHUB_OUTPUT"
             echo "EOF" >> "$GITHUB_OUTPUT"
       - name: "Upload artifacts"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: artifacts-build-global
           path: |
@@ -259,14 +259,14 @@ jobs:
           persist-credentials: false
           submodules: recursive
       - name: Install cached dist
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           name: cargo-dist-cache
           path: ~/.cargo/bin/
       - run: chmod +x ~/.cargo/bin/dist
       # Fetch artifacts from scratch-storage
       - name: Fetch artifacts
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           pattern: artifacts-*
           path: target/distrib/
@@ -279,14 +279,14 @@ jobs:
           cat dist-manifest.json
           echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
       - name: "Upload dist-manifest.json"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           # Overwrite the previous copy
           name: artifacts-dist-manifest
           path: dist-manifest.json
       # Create a GitHub Release while uploading all files to it
       - name: "Download GitHub Artifacts"
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           pattern: artifacts-*
           path: artifacts
@@ -326,7 +326,7 @@ jobs:
           token: ${{ secrets.HOMEBREW_TAP_TOKEN }}
       # So we have access to the formula
       - name: Fetch homebrew formulae
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           pattern: artifacts-*
           path: Formula/
diff --git a/Cargo.toml b/Cargo.toml
index 02a19b9..75eafad 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,16 +20,17 @@ path = "src/main.rs"
 
 [dependencies]
 clap = { version = "4.5.51", features = ["derive"] }
+entropy = "0.4.2"
 goblin = "0.10.3"
-pelite = "0.10"
+pelite = "0.10.0"
 serde = { version = "1.0.228", features = ["derive"] }
-serde_json = "1.0"
+serde_json = "1.0.145"
 thiserror = "2.0.17"
 
 [dev-dependencies]
 criterion = "0.7.0"
-insta = "1.43"
-tempfile = "3.23"
+insta = "1.43.2"
+tempfile = "3.23.0"
 
 # The profile that 'dist' will build with
 [profile.dist]
@@ -43,3 +44,7 @@ harness = false
 [[bench]]
 name = "pe"
 harness = false
+
+[[bench]]
+name = "ascii_extraction"
+harness = false
diff --git a/benches/ascii_extraction.rs b/benches/ascii_extraction.rs
new file mode 100644
index 0000000..31710c7
--- /dev/null
+++ b/benches/ascii_extraction.rs
@@ -0,0 +1,203 @@
+use criterion::{Criterion, criterion_group, criterion_main};
+use std::hint::black_box;
+use stringy::extraction::ascii::{AsciiExtractionConfig, extract_ascii_strings};
+use stringy::extraction::config::NoiseFilterConfig;
+use stringy::extraction::filters::{CompositeNoiseFilter, FilterContext};
+
+fn bench_basic_extraction(c: &mut Criterion) {
+    // Create test data with various string patterns
+    let test_data =
+        b"Hello World\0Test String\0Another String\0Binary\x00\x01\x02Data\0More Strings\0"
+            .repeat(100);
+    let config = AsciiExtractionConfig::default();
+
+    c.bench_function("ascii_extraction_basic", |b| {
+        b.iter(|| {
+            let _ = extract_ascii_strings(black_box(&test_data), black_box(&config));
+        });
+    });
+}
+
+fn bench_filtered_extraction(c: &mut Criterion) {
+    let test_data =
+        b"Hello World\0Test String\0Another String\0Binary\x00\x01\x02Data\0More Strings\0"
+            .repeat(100);
+    let config = AsciiExtractionConfig::default();
+    let filter_config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&filter_config);
+    let context = FilterContext::default();
+
+    c.bench_function("ascii_extraction_with_filtering", |b| {
+        b.iter(|| {
+            let strings = extract_ascii_strings(black_box(&test_data), black_box(&config));
+            for string in &strings {
+                let _ = filter.calculate_confidence(black_box(&string.text), black_box(&context));
+            }
+        });
+    });
+}
+
+fn bench_individual_filters(c: &mut Criterion) {
+    use stringy::extraction::filters::{
+        CharDistributionFilter, ContextFilter, EntropyFilter, LengthFilter, LinguisticFilter,
+        NoiseFilter, RepetitionFilter,
+    };
+
+    let test_strings = vec![
+        "Hello, World!",
+        "AAAA",
+        "Error: file not found",
+        "!!!@@@###",
+        "C:\\Windows\\System32",
+    ];
+
+    let char_filter = CharDistributionFilter;
+    let entropy_filter = EntropyFilter::new(1.5, 7.5);
+    let linguistic_filter = LinguisticFilter::new(0.1, 0.9);
+    let length_filter = LengthFilter::new(200);
+    let repetition_filter = RepetitionFilter::new(0.7);
+    let context_filter = ContextFilter;
+    let context = FilterContext::default();
+
+    c.bench_function("filter_char_distribution", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ = char_filter.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+
+    c.bench_function("filter_entropy", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ = entropy_filter.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+
+    c.bench_function("filter_linguistic", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ =
+                    linguistic_filter.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+
+    c.bench_function("filter_length", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ = length_filter.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+
+    c.bench_function("filter_repetition", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ =
+                    repetition_filter.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+
+    c.bench_function("filter_context", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ = context_filter.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+}
+
+fn bench_composite_filter(c: &mut Criterion) {
+    let test_strings = vec![
+        "Hello, World!",
+        "AAAA",
+        "Error: file not found",
+        "!!!@@@###",
+        "C:\\Windows\\System32",
+        "https://example.com",
+    ];
+
+    let filter_config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&filter_config);
+    let context = FilterContext::default();
+
+    c.bench_function("composite_filter_all_enabled", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ = filter.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+
+    // Test with some filters disabled
+    // Note: CompositeNoiseFilter doesn't expose a builder pattern, so we create a new one
+    // with modified enable flags. For this benchmark, we'll just use the default filter.
+    let filter_partial = CompositeNoiseFilter::new(&filter_config);
+
+    c.bench_function("composite_filter_partial", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ = filter_partial.calculate_confidence(black_box(text), black_box(&context));
+            }
+        });
+    });
+}
+
+fn bench_entropy_calculation(c: &mut Criterion) {
+    use entropy::shannon_entropy;
+
+    let test_strings = vec![
+        "Hello, World!",
+        "AAAA",
+        "Error: file not found",
+        "!!!@@@###",
+    ];
+
+    c.bench_function("entropy_shannon_calculation", |b| {
+        b.iter(|| {
+            for text in &test_strings {
+                let _ = shannon_entropy(black_box(text.as_bytes()));
+            }
+        });
+    });
+}
+
+fn bench_large_binary(c: &mut Criterion) {
+    // Create a large binary-like data with embedded strings
+    let mut large_data = Vec::new();
+    for i in 0..10000 {
+        if i % 100 == 0 {
+            large_data.extend_from_slice(b"Hello World\0");
+        } else {
+            large_data.push((i % 256) as u8);
+        }
+    }
+
+    let config = AsciiExtractionConfig::default();
+    let filter_config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&filter_config);
+    let context = FilterContext::default();
+
+    c.bench_function("large_binary_extraction", |b| {
+        b.iter(|| {
+            let strings = extract_ascii_strings(black_box(&large_data), black_box(&config));
+            for string in &strings {
+                let _ = filter.calculate_confidence(black_box(&string.text), black_box(&context));
+            }
+        });
+    });
+}
+
+criterion_group!(
+    ascii_extraction_benches,
+    bench_basic_extraction,
+    bench_filtered_extraction,
+    bench_individual_filters,
+    bench_composite_filter,
+    bench_entropy_calculation,
+    bench_large_binary
+);
+criterion_main!(ascii_extraction_benches);
diff --git a/dist-workspace.toml b/dist-workspace.toml
index aafdbfe..9f3c862 100644
--- a/dist-workspace.toml
+++ b/dist-workspace.toml
@@ -10,7 +10,13 @@ ci = "github"
 # The installers to generate for each app
 installers = ["shell", "powershell", "homebrew"]
 # Target platforms to build apps for (Rust target-triple syntax)
-targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-unknown-linux-musl", "x86_64-pc-windows-msvc"]
+targets = [
+    "aarch64-apple-darwin",
+    "aarch64-unknown-linux-gnu",
+    "x86_64-unknown-linux-gnu",
+    "x86_64-unknown-linux-musl",
+    "x86_64-pc-windows-msvc",
+]
 # A GitHub repo to push Homebrew formulas to
 tap = "EvilBit-Labs/homebrew-tap"
 # Customize the Homebrew formula name
@@ -49,4 +55,6 @@ install-success-msg = "Successfully installed Stringy! Ready to start looking at
 repository = "EvilBit-Labs/StringyMcStringFace"
 [dist.github-action-commits]
 "actions/checkout" = "v5"
-"actions/download-artifact" = "v5"
+"actions/download-artifact" = "v6"
+"actions/upload-artifact" = "v5"
+"actions/attest-build-provenance" = "v3"
diff --git a/docs/src/string-extraction.md b/docs/src/string-extraction.md
index 1764d35..b18f5a0 100644
--- a/docs/src/string-extraction.md
+++ b/docs/src/string-extraction.md
@@ -10,16 +10,50 @@ Binary Data → Section Analysis → Encoding Detection → String Scanning →
 
 ## Encoding Support
 
-### ASCII/UTF-8 Extraction
+### ASCII Extraction
 
-The most common encoding in most binaries.
+The most common encoding in most binaries. ASCII extraction provides foundational string extraction with configurable minimum length thresholds.
 
 #### Algorithm
 
-1. **Scan for printable sequences**: Characters in range 0x20-0x7E plus common whitespace
+1. **Scan for printable sequences**: Characters in range 0x20-0x7E (strict printable ASCII)
 2. **Length filtering**: Configurable minimum length (default: 4 characters)
 3. **Null termination**: Respect null terminators but don't require them
-4. **Context awareness**: Consider section type for validation
+4. **Section awareness**: Integrate with section metadata for context-aware filtering
+
+#### Basic Extraction
+
+```rust
+use stringy::extraction::ascii::{extract_ascii_strings, AsciiExtractionConfig};
+
+let data = b"Hello\0World\0Test123";
+let config = AsciiExtractionConfig::default();
+let strings = extract_ascii_strings(data, &config);
+
+for string in strings {
+    println!("Found: {} at offset {}", string.text, string.offset);
+}
+```
+
+#### Configuration
+
+```rust
+use stringy::extraction::ascii::AsciiExtractionConfig;
+
+// Default configuration (min_length: 4, no max_length)
+let config = AsciiExtractionConfig::default();
+
+// Custom minimum length
+let config = AsciiExtractionConfig::new(8);
+
+// Custom minimum and maximum length
+let mut config = AsciiExtractionConfig::default();
+config.max_length = Some(256);
+```
+
+### UTF-8 Extraction
+
+UTF-8 extraction builds on ASCII extraction and handles multi-byte characters. See the main extraction module for UTF-8 support.
 
 #### Implementation Details
 
@@ -51,11 +85,129 @@ fn extract_ascii_strings(data: &[u8], min_len: usize) -> Vec<RawString> {
 }
 ```
 
-#### Noise Filtering
+## Noise Filtering
+
+Stringy implements a multi-layered heuristic filtering system to reduce false positives and identify noise in extracted strings. The filtering system uses a combination of entropy analysis, character distribution, linguistic patterns, length checks, repetition detection, and context-aware filtering.
+
+### Filter Architecture
+
+The noise filtering system consists of multiple independent filters that can be combined with configurable weights:
+
+1. **Character Distribution Filter**: Detects abnormal character frequency distributions
+2. **Entropy Filter**: Uses Shannon entropy to detect padding/repetition and random binary
+3. **Linguistic Pattern Filter**: Analyzes vowel-to-consonant ratios and common bigrams
+4. **Length Filter**: Penalizes excessively long strings and very short strings in low-weight sections
+5. **Repetition Filter**: Detects repeated character patterns and repeated substrings
+6. **Context-Aware Filter**: Boosts confidence for strings in high-weight sections
+
+### Character Distribution Analysis
+
+Detects strings with abnormal character distributions:
+
+- **Excessive punctuation** (>80%): Low confidence (0.2)
+- **Excessive repetition** (>90% same character): Very low confidence (0.1)
+- **Excessive non-alphanumeric** (>70%): Low confidence (0.3)
+- **Reasonable distribution**: High confidence (1.0)
+
+### Entropy-Based Filtering
+
+Uses Shannon entropy (bits per byte) to classify strings:
+
+- **Very low entropy** (\<1.5 bits/byte): Likely padding or repetition (confidence: 0.1)
+- **Very high entropy** (>7.5 bits/byte): Likely random binary (confidence: 0.2)
+- **Optimal range** (3.5-6.0 bits/byte): High confidence (1.0)
+- **Acceptable range** (2.0-7.0 bits/byte): Moderate confidence (0.4-0.7)
+
+### Linguistic Pattern Detection
+
+Analyzes text for word-like patterns:
+
+- **Vowel-to-consonant ratio**: Reasonable range 0.2-0.8 for English
+- **Common bigrams**: Detects common English patterns (th, he, in, er, an, re, on, at, en, nd)
+- **Handles non-English**: Gracefully handles non-English strings without over-penalizing
+
+### Length-Based Filtering
+
+Applies penalties based on string length:
+
+- **Excessively long** (>200 characters): Low confidence (0.3) - likely table data
+- **Very short in low-weight sections** (\<4 chars, weight \<0.5): Moderate confidence (0.5)
+- **Normal length** (4-100 characters): High confidence (1.0)
+
+### Repetition Detection
+
+Identifies repetitive patterns:
+
+- **Repeated characters** (e.g., "AAAA", "0000"): Very low confidence (0.1)
+- **Repeated substrings** (e.g., "abcabcabc"): Low confidence (0.2)
+- **Normal strings**: High confidence (1.0)
+
+### Context-Aware Filtering
+
+Boosts or reduces confidence based on section context:
+
+- **String data sections** (.rodata, .rdata, \_\_cstring): High confidence (0.9-1.0)
+- **Read-only data sections**: High confidence (0.9)
+- **Resource sections**: Maximum confidence (1.0) - known-good sources
+- **Code sections**: Lower confidence (0.3-0.5)
+- **Writable data sections**: Moderate confidence (0.6)
+
+### Configuration
+
+```rust
+use stringy::extraction::config::{NoiseFilterConfig, FilterWeights};
+
+// Default configuration
+let config = NoiseFilterConfig::default();
+
+// Customize thresholds
+let mut config = NoiseFilterConfig::default();
+config.entropy_min = 2.0;
+config.entropy_max = 7.0;
+config.max_length = 150;
+
+// Customize filter weights
+config.filter_weights = FilterWeights {
+    entropy_weight: 0.3,
+    char_distribution_weight: 0.25,
+    linguistic_weight: 0.2,
+    length_weight: 0.15,
+    repetition_weight: 0.05,
+    context_weight: 0.05,
+};
+```
+
+### Using Noise Filters
+
+```rust
+use stringy::extraction::config::NoiseFilterConfig;
+use stringy::extraction::filters::{CompositeNoiseFilter, FilterContext};
+use stringy::types::SectionType;
+
+let filter_config = NoiseFilterConfig::default();
+let filter = CompositeNoiseFilter::new(&filter_config);
+let context = FilterContext::default();
+
+let confidence = filter.calculate_confidence("Hello, World!", &context);
+if confidence >= 0.5 {
+    // String passed filtering threshold
+}
+```
+
+### Confidence Scoring
 
-- **Padding detection**: Skip sequences of repeated characters
-- **Table data**: Avoid extracting from obvious data tables
-- **Binary interleaving**: Skip strings with excessive binary data
+Each string is assigned a confidence score (0.0-1.0) indicating how likely it is to be legitimate:
+
+- **1.0**: Maximum confidence (strings from known-good sources like imports, exports, resources)
+- **0.7-0.9**: High confidence (likely legitimate strings)
+- **0.5-0.7**: Moderate confidence (may need review)
+- **0.0-0.5**: Low confidence (likely noise, filtered out by default)
+
+The confidence score is separate from the `score` field used for final ranking. Confidence specifically represents the noise filtering assessment.
+
+### Performance
+
+Noise filtering is designed to add minimal overhead (\<10% per acceptance criteria). Individual filters are optimized for performance, and the composite filter allows enabling/disabling specific filters to balance accuracy and speed.
 
 ### UTF-16 Extraction
 
@@ -251,16 +403,53 @@ fn deduplicate_strings(strings: Vec<RawString>) -> Vec<DeduplicatedString> {
 
 ## Configuration Options
 
-### Length Filtering
+### Extraction Configuration
 
 ```rust
+use stringy::extraction::config::ExtractionConfig;
+
 pub struct ExtractionConfig {
-    pub min_ascii_len: usize,  // Default: 4
-    pub min_utf16_len: usize,  // Default: 3
-    pub max_string_len: usize, // Default: 1024
+    pub min_ascii_length: usize,          // Default: 4
+    pub min_wide_length: usize,           // Default: 3 (for UTF-16)
+    pub enabled_encodings: Vec<Encoding>, // Default: ASCII, UTF-8
+    pub noise_filtering_enabled: bool,    // Default: true
+    pub min_confidence_threshold: f32,    // Default: 0.5
+}
+```
+
+### Noise Filter Configuration
+
+```rust
+use stringy::extraction::config::NoiseFilterConfig;
+
+pub struct NoiseFilterConfig {
+    pub entropy_min: f32,              // Default: 1.5
+    pub entropy_max: f32,              // Default: 7.5
+    pub max_length: usize,             // Default: 200
+    pub max_repetition_ratio: f32,     // Default: 0.7
+    pub min_vowel_ratio: f32,          // Default: 0.1
+    pub max_vowel_ratio: f32,          // Default: 0.9
+    pub filter_weights: FilterWeights, // Default: balanced weights
 }
 ```
 
+### Filter Weights
+
+```rust
+use stringy::extraction::config::FilterWeights;
+
+pub struct FilterWeights {
+    pub entropy_weight: f32,           // Default: 0.25
+    pub char_distribution_weight: f32, // Default: 0.20
+    pub linguistic_weight: f32,        // Default: 0.20
+    pub length_weight: f32,            // Default: 0.15
+    pub repetition_weight: f32,        // Default: 0.10
+    pub context_weight: f32,           // Default: 0.10
+}
+```
+
+All weights must sum to 1.0. The configuration validates this automatically.
+
 ### Encoding Selection
 
 ```rust
@@ -330,14 +519,73 @@ lazy_static! {
 
 ### Validation Heuristics
 
-- **Entropy checking**: Skip high-entropy strings likely to be binary data
-- **Language detection**: Prefer strings with common English patterns
-- **Context validation**: Consider surrounding bytes for legitimacy
+The noise filtering system implements comprehensive validation:
+
+- **Entropy checking**: Uses Shannon entropy to detect padding/repetition and random binary data
+- **Language detection**: Analyzes vowel-to-consonant ratios and common bigrams
+- **Context validation**: Considers section type, weight, and permissions
+- **Character distribution**: Detects abnormal frequency distributions
+- **Repetition detection**: Identifies repeated patterns and padding
 
 ### False Positive Reduction
 
-- **Padding detection**: Skip repeated character sequences
-- **Table data**: Avoid structured binary data
-- **Alignment checking**: Consider memory alignment patterns
+The multi-layered filtering system targets common sources of false positives:
+
+- **Padding detection**: Identifies repeated character sequences (e.g., "AAAA", "\\x00\\x00\\x00\\x00")
+- **Table data**: Filters excessively long strings likely to be structured data
+- **Binary noise**: High-entropy strings are flagged as likely random binary
+- **Context awareness**: Strings in code sections receive lower confidence scores
+
+### Performance Characteristics
+
+Noise filtering is designed for minimal overhead:
+
+- **Target overhead**: \<10% compared to extraction without filtering
+- **Optimized filters**: Each filter is independently optimized
+- **Configurable**: Can enable/disable individual filters to balance accuracy and speed
+- **Scalable**: Handles large binaries efficiently
+
+### Examples
+
+#### Basic Extraction with Filtering
+
+```rust
+use stringy::extraction::ascii::{extract_ascii_strings, AsciiExtractionConfig};
+use stringy::extraction::config::NoiseFilterConfig;
+use stringy::extraction::filters::{CompositeNoiseFilter, FilterContext};
+
+let data = b"Hello World\0AAAA\0Test123";
+let config = AsciiExtractionConfig::default();
+let strings = extract_ascii_strings(data, &config);
+
+let filter_config = NoiseFilterConfig::default();
+let filter = CompositeNoiseFilter::new(&filter_config);
+let context = FilterContext::default();
+
+let filtered: Vec<_> = strings
+    .into_iter()
+    .filter(|s| filter.calculate_confidence(&s.text, &context) >= 0.5)
+    .collect();
+```
+
+#### Custom Filter Configuration
+
+```rust
+use stringy::extraction::config::{NoiseFilterConfig, FilterWeights};
+
+let mut config = NoiseFilterConfig::default();
+config.entropy_min = 2.0;
+config.entropy_max = 7.0;
+config.max_length = 150;
+
+config.filter_weights = FilterWeights {
+    entropy_weight: 0.4,
+    char_distribution_weight: 0.3,
+    linguistic_weight: 0.15,
+    length_weight: 0.1,
+    repetition_weight: 0.03,
+    context_weight: 0.02,
+};
+```
 
-This comprehensive extraction system ensures high-quality string extraction while maintaining performance and minimizing false positives.
+This comprehensive extraction system ensures high-quality string extraction while maintaining performance and minimizing false positives through multi-layered noise filtering.
diff --git a/src/extraction/ascii.rs b/src/extraction/ascii.rs
new file mode 100644
index 0000000..9340229
--- /dev/null
+++ b/src/extraction/ascii.rs
@@ -0,0 +1,820 @@
+//! ASCII String Extraction Module
+//!
+//! This module provides foundational ASCII string extraction for StringyMcStringFace.
+//! It implements byte-level scanning for contiguous printable ASCII sequences and serves
+//! as the reference implementation for future UTF-8, UTF-16LE, and UTF-16BE extractors.
+//!
+//! # Examples
+//!
+//! ```rust
+//! use stringy::extraction::ascii::{extract_ascii_strings, extract_from_section, AsciiExtractionConfig};
+//! use stringy::types::{SectionInfo, SectionType};
+//!
+//! // Basic extraction from raw data
+//! let data = b"Hello\0World\0Test123";
+//! let config = AsciiExtractionConfig::default();
+//! let strings = extract_ascii_strings(data, &config);
+//!
+//! // Section-aware extraction
+//! let section = SectionInfo {
+//!     name: ".rodata".to_string(),
+//!     offset: 0,
+//!     size: 20,
+//!     rva: Some(0x1000),
+//!     section_type: SectionType::StringData,
+//!     is_executable: false,
+//!     is_writable: false,
+//!     weight: 1.0,
+//! };
+//! let strings = extract_from_section(&section, data, &config);
+//! ```
+
+use crate::extraction::config::NoiseFilterConfig;
+use crate::extraction::filters::{CompositeNoiseFilter, FilterContext};
+use crate::types::{Encoding, FoundString, SectionInfo, StringSource};
+
+/// Configuration for ASCII string extraction
+///
+/// Controls minimum and maximum string length filtering. This structure serves as the
+/// foundation for future configuration expansion, including encoding preferences and
+/// tag filters as mentioned in the issue.
+///
+/// # Default Values
+///
+/// - `min_length`: 4 (standard minimum to reduce noise)
+/// - `max_length`: None (no upper limit by default)
+///
+/// # Examples
+///
+/// ```rust
+/// use stringy::extraction::ascii::AsciiExtractionConfig;
+///
+/// // Use default configuration
+/// let config = AsciiExtractionConfig::default();
+///
+/// // Custom minimum length
+/// let config = AsciiExtractionConfig::new(8);
+///
+/// // Custom minimum and maximum length
+/// let mut config = AsciiExtractionConfig::default();
+/// config.max_length = Some(256);
+/// ```
+#[derive(Debug, Clone)]
+pub struct AsciiExtractionConfig {
+    /// Minimum string length in bytes (default: 4)
+    pub min_length: usize,
+    /// Maximum string length in bytes (default: None, no limit)
+    pub max_length: Option<usize>,
+}
+
+impl Default for AsciiExtractionConfig {
+    fn default() -> Self {
+        Self {
+            min_length: 4,
+            max_length: None,
+        }
+    }
+}
+
+impl AsciiExtractionConfig {
+    /// Create a new AsciiExtractionConfig with custom minimum length
+    ///
+    /// # Arguments
+    ///
+    /// * `min_length` - Minimum string length in bytes
+    ///
+    /// # Returns
+    ///
+    /// New AsciiExtractionConfig with specified minimum length and default max_length (None)
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use stringy::extraction::ascii::AsciiExtractionConfig;
+    ///
+    /// let config = AsciiExtractionConfig::new(8);
+    /// assert_eq!(config.min_length, 8);
+    /// assert_eq!(config.max_length, None);
+    /// ```
+    pub fn new(min_length: usize) -> Self {
+        Self {
+            min_length,
+            max_length: None,
+        }
+    }
+}
+
+/// Check if a byte is in the printable ASCII range
+///
+/// Printable ASCII includes characters from 0x20 (space) through 0x7E (tilde).
+/// This range covers all standard printable ASCII characters.
+///
+/// **Note on printable character definitions**: This function uses a strict definition
+/// of printable ASCII (0x20-0x7E only), excluding whitespace control characters like
+/// tab, newline, and carriage return. This differs from `is_printable_text_byte` in
+/// `extraction::mod`, which includes common whitespace characters (0x09, 0x0A, 0x0D)
+/// to handle formatted text. This strict definition ensures ASCII-only extraction
+/// produces predictable, consistent results.
+///
+/// # Arguments
+///
+/// * `byte` - Byte to check
+///
+/// # Returns
+///
+/// `true` if the byte is printable ASCII, `false` otherwise
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::ascii::is_printable_ascii;
+///
+/// assert!(is_printable_ascii(b' '));
+/// assert!(is_printable_ascii(b'A'));
+/// assert!(is_printable_ascii(b'z'));
+/// assert!(is_printable_ascii(b'0'));
+/// assert!(is_printable_ascii(b'~'));
+/// assert!(!is_printable_ascii(0x00));
+/// assert!(!is_printable_ascii(0x1F));
+/// assert!(!is_printable_ascii(0x7F));
+/// ```
+#[inline]
+pub fn is_printable_ascii(byte: u8) -> bool {
+    (0x20..=0x7E).contains(&byte)
+}
+
+/// Extract ASCII strings from a byte slice
+///
+/// Scans through the byte slice looking for contiguous sequences of printable ASCII
+/// characters. When a non-printable byte is encountered, checks if the accumulated
+/// sequence meets the minimum length threshold and creates a FoundString entry.
+///
+/// # Algorithm
+///
+/// 1. Iterate through the byte slice tracking current string start position and accumulated bytes
+/// 2. When encountering a printable ASCII byte, accumulate it in the current string buffer
+/// 3. When encountering a non-printable byte, check if accumulated length meets minimum threshold
+/// 4. If threshold met, create a `FoundString` with proper metadata
+/// 5. Handle end-of-buffer edge case by checking accumulated string after loop completes
+/// 6. Apply max_length filtering if configured
+///
+/// # Arguments
+///
+/// * `data` - Byte slice to scan for ASCII strings
+/// * `config` - Extraction configuration
+///
+/// # Returns
+///
+/// Vector of FoundString entries with the following metadata:
+/// - `text`: UTF-8 string from accumulated bytes
+/// - `encoding`: `Encoding::Ascii`
+/// - `offset`: Start position in the data slice
+/// - `length`: Byte count
+/// - `source`: `StringSource::SectionData`
+/// - `tags`: Empty vector
+/// - `score`: 0
+/// - `section`: None
+/// - `rva`: None
+///
+/// # Edge Cases
+///
+/// - Empty input data returns empty vector
+/// - Data smaller than minimum length returns empty vector
+/// - String at buffer start (start_offset = 0)
+/// - String at buffer end (checked after loop)
+/// - Very long strings are filtered by max_length if configured
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::ascii::{extract_ascii_strings, AsciiExtractionConfig};
+///
+/// let data = b"Hello\0World\0Test123";
+/// let config = AsciiExtractionConfig::default();
+/// let strings = extract_ascii_strings(data, &config);
+///
+/// assert_eq!(strings.len(), 3);
+/// assert_eq!(strings[0].text, "Hello");
+/// assert_eq!(strings[0].offset, 0);
+/// assert_eq!(strings[1].text, "World");
+/// assert_eq!(strings[1].offset, 6);
+/// ```
+pub fn extract_ascii_strings(data: &[u8], config: &AsciiExtractionConfig) -> Vec<FoundString> {
+    let mut strings = Vec::new();
+    let mut current_string_start: Option<usize> = None;
+    let mut current_string_bytes = Vec::new();
+
+    for (i, &byte) in data.iter().enumerate() {
+        if is_printable_ascii(byte) {
+            if current_string_start.is_none() {
+                current_string_start = Some(i);
+            }
+            current_string_bytes.push(byte);
+        } else {
+            // End of current string candidate
+            if let Some(start) = current_string_start {
+                let len = current_string_bytes.len();
+                // Check minimum length
+                if len >= config.min_length {
+                    // Check maximum length if configured
+                    if let Some(max_len) = config.max_length
+                        && len > max_len
+                    {
+                        // Skip this string, reset accumulator
+                        current_string_start = None;
+                        current_string_bytes.clear();
+                        continue;
+                    }
+                    // Convert bytes to UTF-8 string (ASCII is valid UTF-8)
+                    let bytes = std::mem::take(&mut current_string_bytes);
+                    let text = String::from_utf8(bytes).expect("ASCII bytes should be valid UTF-8");
+                    strings.push(FoundString {
+                        text,
+                        encoding: Encoding::Ascii,
+                        offset: start as u64,
+                        rva: None,
+                        section: None,
+                        length: len as u32,
+                        tags: Vec::new(),
+                        score: 0,
+                        source: StringSource::SectionData,
+                        confidence: 1.0,
+                    });
+                }
+            }
+            current_string_start = None;
+            current_string_bytes.clear();
+        }
+    }
+
+    // Handle string at end of buffer
+    if let Some(start) = current_string_start {
+        let len = current_string_bytes.len();
+        if len >= config.min_length {
+            // Check maximum length if configured
+            if let Some(max_len) = config.max_length {
+                if len > max_len {
+                    // Skip this string
+                } else {
+                    let bytes = std::mem::take(&mut current_string_bytes);
+                    let text = String::from_utf8(bytes).expect("ASCII bytes should be valid UTF-8");
+                    strings.push(FoundString {
+                        text,
+                        encoding: Encoding::Ascii,
+                        offset: start as u64,
+                        rva: None,
+                        section: None,
+                        length: len as u32,
+                        tags: Vec::new(),
+                        score: 0,
+                        source: StringSource::SectionData,
+                        confidence: 1.0,
+                    });
+                }
+            } else {
+                let bytes = std::mem::take(&mut current_string_bytes);
+                let text = String::from_utf8(bytes).expect("ASCII bytes should be valid UTF-8");
+                strings.push(FoundString {
+                    text,
+                    encoding: Encoding::Ascii,
+                    offset: start as u64,
+                    rva: None,
+                    section: None,
+                    length: len as u32,
+                    tags: Vec::new(),
+                    score: 0,
+                    source: StringSource::SectionData,
+                    confidence: 1.0,
+                });
+            }
+        }
+    }
+
+    strings
+}
+
+/// Extract ASCII strings from a specific section with proper metadata population
+///
+/// This function extracts strings from a section of the binary, adjusting offsets
+/// and populating section-specific metadata (section name, RVA). It also applies
+/// noise filtering if enabled in the extraction configuration.
+///
+/// # Implementation
+///
+/// 1. Calculate section data slice using section.offset and section.size, with bounds checking
+/// 2. Call `extract_ascii_strings` on the section data slice
+/// 3. For each candidate string, compute confidence using noise filters if enabled
+/// 4. Apply confidence threshold filtering if noise filtering is enabled
+/// 5. Post-process each FoundString to adjust offsets (add section.offset to relative offsets)
+/// 6. Populate section field with section.name.clone()
+/// 7. Populate rva field with calculated value (section.rva + relative_offset) if section.rva is Some
+/// 8. Return the adjusted vector of FoundStrings
+///
+/// # Arguments
+///
+/// * `section` - Section metadata
+/// * `data` - Raw binary data
+/// * `config` - Extraction configuration
+/// * `noise_filter_config` - Optional noise filter configuration (if None, filtering is skipped)
+/// * `noise_filtering_enabled` - Whether to apply noise filtering
+/// * `min_confidence_threshold` - Minimum confidence threshold for filtering
+///
+/// # Returns
+///
+/// Vector of FoundString entries with complete metadata including:
+/// - Adjusted absolute offsets (section.offset + relative_offset)
+/// - Section name populated
+/// - RVA calculated if section.rva is available
+/// - Confidence scores computed from noise filters
+///
+/// # Edge Cases
+///
+/// - Section boundaries: ensures slice doesn't exceed data.len()
+/// - Section offset + size overflow: uses checked arithmetic
+/// - Empty sections return empty vector
+/// - Sections beyond data bounds return empty vector
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::ascii::{extract_from_section, AsciiExtractionConfig};
+/// use stringy::extraction::config::NoiseFilterConfig;
+/// use stringy::types::{SectionInfo, SectionType};
+///
+/// let section = SectionInfo {
+///     name: ".rodata".to_string(),
+///     offset: 10,
+///     size: 20,
+///     rva: Some(0x1000),
+///     section_type: SectionType::StringData,
+///     is_executable: false,
+///     is_writable: false,
+///     weight: 1.0,
+/// };
+///
+/// let data = b"prefix\0Hello World\0suffix";
+/// let config = AsciiExtractionConfig::default();
+/// let noise_config = Some(NoiseFilterConfig::default());
+/// let strings = extract_from_section(&section, data, &config, noise_config.as_ref(), true, 0.5);
+///
+/// // Strings will have adjusted offsets and section metadata
+/// for string in strings {
+///     assert_eq!(string.section, Some(".rodata".to_string()));
+///     assert!(string.offset >= 10);
+/// }
+/// ```
+pub fn extract_from_section(
+    section: &SectionInfo,
+    data: &[u8],
+    config: &AsciiExtractionConfig,
+    noise_filter_config: Option<&NoiseFilterConfig>,
+    noise_filtering_enabled: bool,
+    min_confidence_threshold: f32,
+) -> Vec<FoundString> {
+    // Calculate section data slice with bounds checking
+    let section_offset = section.offset as usize;
+    let section_size = section.size as usize;
+
+    // Check if section is out of bounds
+    if section_offset >= data.len() {
+        return Vec::new();
+    }
+
+    // Calculate end offset with checked arithmetic
+    let end_offset = section_offset
+        .checked_add(section_size)
+        .unwrap_or(data.len())
+        .min(data.len());
+
+    // Extract section data slice
+    let section_data = &data[section_offset..end_offset];
+
+    // Extract strings from section data
+    let strings = extract_ascii_strings(section_data, config);
+
+    // Build filter context from section
+    let filter_context = FilterContext::from_section(section);
+
+    // Create composite noise filter if filtering is enabled and config is provided
+    let filter = if noise_filtering_enabled {
+        noise_filter_config.map(CompositeNoiseFilter::new)
+    } else {
+        None
+    };
+
+    // Post-process: compute confidence, apply threshold, adjust offsets and populate metadata
+    let mut filtered_strings = Vec::new();
+    for mut string in strings {
+        // Compute confidence if filtering is enabled
+        if let Some(ref noise_filter) = filter {
+            string.confidence = noise_filter.calculate_confidence(&string.text, &filter_context);
+            // Apply threshold filtering
+            if noise_filtering_enabled && string.confidence < min_confidence_threshold {
+                continue;
+            }
+        } else {
+            // If filtering is disabled, keep default confidence of 1.0
+            string.confidence = 1.0;
+        }
+
+        // Adjust offset: add section.offset to relative offset
+        // string.offset is relative to section_data (starts at 0), so add section.offset
+        let relative_offset = string.offset;
+        string.offset = section.offset + relative_offset;
+
+        // Populate section name
+        string.section = Some(section.name.clone());
+
+        // Calculate and populate RVA if section.rva is available
+        if let Some(base_rva) = section.rva {
+            // relative_offset is the offset within the section
+            string.rva = Some(base_rva + relative_offset);
+        }
+
+        filtered_strings.push(string);
+    }
+
+    filtered_strings
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{SectionInfo, SectionType};
+
+    // Helper to create test section
+    fn create_test_section(name: &str, offset: u64, size: u64, rva: Option<u64>) -> SectionInfo {
+        SectionInfo {
+            name: name.to_string(),
+            offset,
+            size,
+            rva,
+            section_type: SectionType::StringData,
+            is_executable: false,
+            is_writable: false,
+            weight: 1.0,
+        }
+    }
+
+    #[test]
+    fn test_is_printable_ascii() {
+        // Printable ASCII range (0x20-0x7E)
+        assert!(is_printable_ascii(0x20)); // space
+        assert!(is_printable_ascii(0x21)); // !
+        assert!(is_printable_ascii(0x41)); // A
+        assert!(is_printable_ascii(0x5A)); // Z
+        assert!(is_printable_ascii(0x61)); // a
+        assert!(is_printable_ascii(0x7A)); // z
+        assert!(is_printable_ascii(0x30)); // 0
+        assert!(is_printable_ascii(0x39)); // 9
+        assert!(is_printable_ascii(0x7E)); // ~
+
+        // Non-printable
+        assert!(!is_printable_ascii(0x00));
+        assert!(!is_printable_ascii(0x1F));
+        assert!(!is_printable_ascii(0x7F));
+        assert!(!is_printable_ascii(0xFF));
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_basic() {
+        // Basic extraction with default minimum length (4)
+        let data = b"Hello\0World\0Test";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 3);
+        assert_eq!(strings[0].text, "Hello");
+        assert_eq!(strings[0].offset, 0);
+        assert_eq!(strings[0].encoding, Encoding::Ascii);
+        assert_eq!(strings[0].source, StringSource::SectionData);
+        assert_eq!(strings[1].text, "World");
+        assert_eq!(strings[1].offset, 6);
+        assert_eq!(strings[2].text, "Test");
+        assert_eq!(strings[2].offset, 12);
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_custom_min_length() {
+        // Custom minimum length filtering
+        let data = b"Hi\0Test\0AB\0LongString";
+        let config = AsciiExtractionConfig::new(3);
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].text, "Test");
+        assert_eq!(strings[1].text, "LongString");
+        // "Hi" and "AB" should be filtered out (length < 3)
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_min_length_5() {
+        let data = b"Test\0Hello\0World";
+        let config = AsciiExtractionConfig::new(5);
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].text, "Hello");
+        assert_eq!(strings[1].text, "World");
+        // "Test" should be filtered out (length < 5)
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_min_length_10() {
+        let data = b"Short\0VeryLongStringHere";
+        let config = AsciiExtractionConfig::new(10);
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].text, "VeryLongStringHere");
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_empty_input() {
+        // Empty input edge case
+        let data = b"";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_no_strings_found() {
+        // No strings found (all binary data)
+        let data = &[0x00, 0xFF, 0x01, 0x02, 0x03];
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_string_at_start() {
+        // String at buffer start
+        let data = b"Start\0Middle\0End";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        // "End" is only 3 characters, below min_length=4, so filtered out
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].text, "Start");
+        assert_eq!(strings[0].offset, 0);
+        assert_eq!(strings[1].text, "Middle");
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_string_at_end() {
+        // String at buffer end
+        let data = b"Start\0Middle\0EndTest";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 3);
+        assert_eq!(strings[2].text, "EndTest");
+        assert_eq!(strings[2].offset, 13);
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_single_char_below_minimum() {
+        // Single character below minimum
+        let data = b"A\0Test\0B\0C";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].text, "Test");
+        // Single characters should be filtered out
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_exact_minimum_length() {
+        // Exact minimum length string
+        let data = b"Test\0Hello";
+        let config = AsciiExtractionConfig::default(); // min_length = 4
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].text, "Test");
+        assert_eq!(strings[0].length, 4);
+        assert_eq!(strings[1].text, "Hello");
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_offset_calculation() {
+        // Offset calculation correctness
+        let data = b"prefix\0Hello\0World\0suffix";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        // All strings are >= 4 characters, so all should be extracted
+        assert_eq!(strings.len(), 4);
+        assert_eq!(strings[0].text, "prefix");
+        assert_eq!(strings[0].offset, 0);
+        assert_eq!(strings[1].text, "Hello");
+        assert_eq!(strings[1].offset, 7); // "prefix\0" = 7 bytes
+        assert_eq!(strings[2].text, "World");
+        assert_eq!(strings[2].offset, 13); // "prefix\0Hello\0" = 13 bytes
+        assert_eq!(strings[3].text, "suffix");
+        assert_eq!(strings[3].offset, 19); // "prefix\0Hello\0World\0" = 19 bytes
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_multiple_strings_sequence() {
+        // Multiple strings in sequence
+        let data = b"First\0Second\0Third\0Fourth";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 4);
+        assert_eq!(strings[0].text, "First");
+        assert_eq!(strings[1].text, "Second");
+        assert_eq!(strings[2].text, "Third");
+        assert_eq!(strings[3].text, "Fourth");
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_separated_by_single_byte() {
+        // Strings separated by single non-printable byte
+        let data = b"Hello\x00World\x01Test";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 3);
+        assert_eq!(strings[0].text, "Hello");
+        assert_eq!(strings[1].text, "World");
+        assert_eq!(strings[2].text, "Test");
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_max_length_filtering() {
+        // Max length filtering if configured
+        let data = b"Short\0VeryLongStringHere";
+        let config = AsciiExtractionConfig {
+            max_length: Some(10),
+            ..Default::default()
+        };
+        let strings = extract_ascii_strings(data, &config);
+
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].text, "Short");
+        // "VeryLongStringHere" should be filtered out (length > 10)
+    }
+
+    #[test]
+    fn test_extract_ascii_strings_very_long_string() {
+        // Very long strings (test max_length enforcement)
+        let long_string = "A".repeat(1000);
+        let data = format!("{}\0Short", long_string).into_bytes();
+        let config = AsciiExtractionConfig {
+            max_length: Some(100),
+            ..Default::default()
+        };
+        let strings = extract_ascii_strings(&data, &config);
+
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].text, "Short");
+        // Very long string should be filtered out
+    }
+
+    #[test]
+    fn test_extract_from_section_basic() {
+        // Basic section extraction
+        let section = create_test_section(".rodata", 0, 20, Some(0x1000));
+        let data = b"Hello World\0Test";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].text, "Hello World");
+        assert_eq!(strings[0].offset, 0);
+        assert_eq!(strings[0].rva, Some(0x1000));
+        assert_eq!(strings[0].section, Some(".rodata".to_string()));
+        assert_eq!(strings[1].text, "Test");
+        assert_eq!(strings[1].offset, 12);
+        assert_eq!(strings[1].rva, Some(0x100C));
+    }
+
+    #[test]
+    fn test_extract_from_section_offset_adjustment() {
+        // Section metadata population (verify section name and RVA)
+        // data = b"prefix\0Hello World\0suffix"
+        //        "prefix\0" = 7 bytes, so "Hello World" starts at offset 7
+        // Section should start at 7 to include "Hello World"
+        let section = create_test_section(".data", 7, 12, Some(0x2000));
+        let data = b"prefix\0Hello World\0suffix";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].text, "Hello World");
+        // Section starts at 7, "Hello World" is at relative offset 0 within section
+        // Absolute offset = section.offset (7) + relative_offset (0) = 7
+        assert_eq!(strings[0].offset, 7);
+        assert_eq!(strings[0].rva, Some(0x2000));
+        assert_eq!(strings[0].section, Some(".data".to_string()));
+    }
+
+    #[test]
+    fn test_extract_from_section_rva_calculation() {
+        // RVA calculation with section offset
+        let section = create_test_section(".text", 5, 10, Some(0x1000));
+        let data = b"pre\0Hello\0suf";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        if !strings.is_empty() {
+            // Section data is data[5..15] = "Hello\0suf"
+            // "Hello" is at relative offset 0
+            // Absolute offset = 5 + 0 = 5
+            // RVA = 0x1000 + 0 = 0x1000
+            assert_eq!(strings[0].offset, 5);
+            assert_eq!(strings[0].rva, Some(0x1000));
+        }
+    }
+
+    #[test]
+    fn test_extract_from_section_no_rva() {
+        // Section without RVA
+        let section = create_test_section(".data", 0, 20, None);
+        let data = b"Hello World\0Test";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].rva, None);
+        assert_eq!(strings[1].rva, None);
+    }
+
+    #[test]
+    fn test_extract_from_section_section_name() {
+        // Verify section name is populated
+        let section = create_test_section(".custom", 0, 20, Some(0x3000));
+        let data = b"Test String\0Another";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        for string in &strings {
+            assert_eq!(string.section, Some(".custom".to_string()));
+        }
+    }
+
+    #[test]
+    fn test_extract_from_section_bounds_checking() {
+        // Section boundaries (ensure slice doesn't exceed data.len())
+        let section = create_test_section(".data", 0, 1000, None);
+        let data = b"Short data";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        // Should only extract from available data, not panic
+        assert!(strings.len() <= 1);
+    }
+
+    #[test]
+    fn test_extract_from_section_out_of_bounds() {
+        // Section offset + size overflow (use checked arithmetic)
+        let section = create_test_section(".data", 1000, 100, None);
+        let data = b"Short data";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        // Should return empty vector, not panic
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_extract_from_section_empty_section() {
+        // Empty section
+        let section = create_test_section(".empty", 0, 0, None);
+        let data = b"Some data";
+        let config = AsciiExtractionConfig::default();
+        let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_extraction_config_default() {
+        let config = AsciiExtractionConfig::default();
+        assert_eq!(config.min_length, 4);
+        assert_eq!(config.max_length, None);
+    }
+
+    #[test]
+    fn test_extraction_config_new() {
+        let config = AsciiExtractionConfig::new(8);
+        assert_eq!(config.min_length, 8);
+        assert_eq!(config.max_length, None);
+    }
+
+    #[test]
+    fn test_extraction_config_custom_max_length() {
+        let config = AsciiExtractionConfig {
+            max_length: Some(256),
+            ..Default::default()
+        };
+        assert_eq!(config.min_length, 4);
+        assert_eq!(config.max_length, Some(256));
+    }
+}
diff --git a/src/extraction/config.rs b/src/extraction/config.rs
new file mode 100644
index 0000000..af2f678
--- /dev/null
+++ b/src/extraction/config.rs
@@ -0,0 +1,221 @@
+//! Extraction Configuration Module
+//!
+//! This module provides configuration structures for controlling string extraction
+//! and noise filtering behavior. It allows fine-tuning of thresholds, filter weights,
+//! and extraction parameters.
+
+/// Configuration for noise filtering heuristics
+///
+/// Controls thresholds and parameters for the various noise detection filters.
+/// All thresholds are configurable to allow fine-tuning for different use cases.
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::config::NoiseFilterConfig;
+///
+/// // Use default configuration
+/// let config = NoiseFilterConfig::default();
+///
+/// // Customize thresholds
+/// let mut config = NoiseFilterConfig::default();
+/// config.entropy_min = 2.0;
+/// config.entropy_max = 7.0;
+/// ```
+#[derive(Debug, Clone)]
+pub struct NoiseFilterConfig {
+    /// Minimum entropy threshold in bits per byte (default: 1.5)
+    ///
+    /// Strings with entropy below this are likely padding or repetition.
+    pub entropy_min: f32,
+    /// Maximum entropy threshold in bits per byte (default: 7.5)
+    ///
+    /// Strings with entropy above this are likely random binary data.
+    pub entropy_max: f32,
+    /// Maximum string length before applying penalty (default: 200)
+    ///
+    /// Very long strings are often table data or other structured content.
+    pub max_length: usize,
+    /// Maximum ratio of repeated characters (default: 0.7)
+    ///
+    /// Strings with higher repetition ratios are likely padding or noise.
+    pub max_repetition_ratio: f32,
+    /// Minimum vowel ratio for linguistic filter (default: 0.1)
+    ///
+    /// Used to detect consonant-heavy strings that may be noise.
+    pub min_vowel_ratio: f32,
+    /// Maximum vowel ratio for linguistic filter (default: 0.9)
+    ///
+    /// Used to detect vowel-heavy strings that may be noise.
+    pub max_vowel_ratio: f32,
+    /// Weights for combining filter scores (default: balanced weights)
+    pub filter_weights: FilterWeights,
+}
+
+impl Default for NoiseFilterConfig {
+    fn default() -> Self {
+        Self {
+            entropy_min: 1.5,
+            entropy_max: 7.5,
+            max_length: 200,
+            max_repetition_ratio: 0.7,
+            min_vowel_ratio: 0.1,
+            max_vowel_ratio: 0.9,
+            filter_weights: FilterWeights::default(),
+        }
+    }
+}
+
+impl NoiseFilterConfig {
+    /// Validate the configuration
+    ///
+    /// Returns an error if any thresholds are invalid.
+    pub fn validate(&self) -> Result<(), String> {
+        if self.entropy_min < 0.0 || self.entropy_min > 8.0 {
+            return Err("entropy_min must be between 0.0 and 8.0".to_string());
+        }
+        if self.entropy_max < 0.0 || self.entropy_max > 8.0 {
+            return Err("entropy_max must be between 0.0 and 8.0".to_string());
+        }
+        if self.entropy_min >= self.entropy_max {
+            return Err("entropy_min must be less than entropy_max".to_string());
+        }
+        if self.max_length == 0 {
+            return Err("max_length must be greater than 0".to_string());
+        }
+        if !(0.0..=1.0).contains(&self.max_repetition_ratio) {
+            return Err("max_repetition_ratio must be between 0.0 and 1.0".to_string());
+        }
+        if !(0.0..=1.0).contains(&self.min_vowel_ratio) {
+            return Err("min_vowel_ratio must be between 0.0 and 1.0".to_string());
+        }
+        if !(0.0..=1.0).contains(&self.max_vowel_ratio) {
+            return Err("max_vowel_ratio must be between 0.0 and 1.0".to_string());
+        }
+        if self.min_vowel_ratio >= self.max_vowel_ratio {
+            return Err("min_vowel_ratio must be less than max_vowel_ratio".to_string());
+        }
+        self.filter_weights.validate()?;
+        Ok(())
+    }
+}
+
+/// Weights for combining multiple filter confidence scores
+///
+/// These weights control how individual filter scores are combined into
+/// an overall confidence assessment. All weights must sum to 1.0.
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::config::FilterWeights;
+///
+/// // Use default weights
+/// let weights = FilterWeights::default();
+///
+/// // Customize weights (must sum to 1.0)
+/// let weights = FilterWeights {
+///     entropy_weight: 0.3,
+///     char_distribution_weight: 0.25,
+///     linguistic_weight: 0.2,
+///     length_weight: 0.15,
+///     repetition_weight: 0.05,
+///     context_weight: 0.05,
+/// };
+/// ```
+#[derive(Debug, Clone)]
+pub struct FilterWeights {
+    /// Weight for entropy filter (default: 0.25)
+    pub entropy_weight: f32,
+    /// Weight for character distribution filter (default: 0.20)
+    pub char_distribution_weight: f32,
+    /// Weight for linguistic pattern filter (default: 0.20)
+    pub linguistic_weight: f32,
+    /// Weight for length filter (default: 0.15)
+    pub length_weight: f32,
+    /// Weight for repetition filter (default: 0.10)
+    pub repetition_weight: f32,
+    /// Weight for context-aware filter (default: 0.10)
+    pub context_weight: f32,
+}
+
+impl Default for FilterWeights {
+    fn default() -> Self {
+        Self {
+            entropy_weight: 0.25,
+            char_distribution_weight: 0.20,
+            linguistic_weight: 0.20,
+            length_weight: 0.15,
+            repetition_weight: 0.10,
+            context_weight: 0.10,
+        }
+    }
+}
+
+impl FilterWeights {
+    /// Validate that weights sum to 1.0
+    ///
+    /// Returns an error if the sum is not approximately 1.0 (within 0.01 tolerance).
+    pub fn validate(&self) -> Result<(), String> {
+        let sum = self.entropy_weight
+            + self.char_distribution_weight
+            + self.linguistic_weight
+            + self.length_weight
+            + self.repetition_weight
+            + self.context_weight;
+        if (sum - 1.0).abs() > 0.01 {
+            return Err(format!("Filter weights must sum to 1.0, got {}", sum));
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_noise_filter_config_default() {
+        let config = NoiseFilterConfig::default();
+        assert_eq!(config.entropy_min, 1.5);
+        assert_eq!(config.entropy_max, 7.5);
+        assert_eq!(config.max_length, 200);
+        assert_eq!(config.max_repetition_ratio, 0.7);
+    }
+
+    #[test]
+    fn test_noise_filter_config_validate() {
+        let mut config = NoiseFilterConfig::default();
+        assert!(config.validate().is_ok());
+
+        config.entropy_min = 8.0;
+        assert!(config.validate().is_err());
+
+        config.entropy_min = 1.5;
+        config.entropy_max = 1.0;
+        assert!(config.validate().is_err());
+    }
+
+    #[test]
+    fn test_filter_weights_default() {
+        let weights = FilterWeights::default();
+        assert_eq!(weights.entropy_weight, 0.25);
+        assert_eq!(weights.char_distribution_weight, 0.20);
+        assert_eq!(weights.linguistic_weight, 0.20);
+        assert_eq!(weights.length_weight, 0.15);
+        assert_eq!(weights.repetition_weight, 0.10);
+        assert_eq!(weights.context_weight, 0.10);
+    }
+
+    #[test]
+    fn test_filter_weights_validate() {
+        let weights = FilterWeights::default();
+        assert!(weights.validate().is_ok());
+
+        let bad_weights = FilterWeights {
+            entropy_weight: 0.5,
+            ..Default::default()
+        };
+        assert!(bad_weights.validate().is_err());
+    }
+}
diff --git a/src/extraction/filters.rs b/src/extraction/filters.rs
new file mode 100644
index 0000000..a7e7ab1
--- /dev/null
+++ b/src/extraction/filters.rs
@@ -0,0 +1,702 @@
+//! Noise Filtering Module
+//!
+//! This module provides multi-layered heuristic filters for detecting and filtering
+//! noise in extracted strings. It uses a combination of entropy analysis, character
+//! distribution, linguistic patterns, length checks, repetition detection, and
+//! context-aware filtering to assign confidence scores to strings.
+
+use crate::extraction::config::{FilterWeights, NoiseFilterConfig};
+use crate::types::{SectionInfo, SectionType};
+
+/// Context information for noise filtering
+///
+/// Provides section metadata and surrounding context to help filters make
+/// informed decisions about string legitimacy.
+#[derive(Debug, Clone)]
+pub struct FilterContext {
+    /// Section type where the string was found
+    pub section_type: SectionType,
+    /// Section name
+    pub section_name: Option<String>,
+    /// Section weight (higher = more likely to contain strings)
+    pub section_weight: f32,
+    /// Whether the section is executable
+    pub is_executable: bool,
+    /// Whether the section is writable
+    pub is_writable: bool,
+    /// Surrounding bytes for context (optional, for future use)
+    pub surrounding_bytes: Option<Vec<u8>>,
+}
+
+impl Default for FilterContext {
+    fn default() -> Self {
+        Self {
+            section_type: SectionType::Other,
+            section_name: None,
+            section_weight: 0.5,
+            is_executable: false,
+            is_writable: false,
+            surrounding_bytes: None,
+        }
+    }
+}
+
+impl FilterContext {
+    /// Create a new FilterContext from a SectionInfo
+    pub fn from_section(section: &SectionInfo) -> Self {
+        Self {
+            section_type: section.section_type,
+            section_name: Some(section.name.clone()),
+            section_weight: section.weight,
+            is_executable: section.is_executable,
+            is_writable: section.is_writable,
+            surrounding_bytes: None,
+        }
+    }
+}
+
+/// Trait for noise filters that calculate confidence scores
+///
+/// Each filter implements this trait to provide a confidence score (0.0-1.0)
+/// indicating how likely a string is to be legitimate vs noise.
+pub trait NoiseFilter {
+    /// Calculate confidence score for a string
+    ///
+    /// Returns a value between 0.0 (definitely noise) and 1.0 (definitely legitimate).
+    ///
+    /// # Arguments
+    ///
+    /// * `text` - The string text to analyze
+    /// * `context` - Context information about where the string was found
+    ///
+    /// # Returns
+    ///
+    /// Confidence score between 0.0 and 1.0
+    fn calculate_confidence(&self, text: &str, context: &FilterContext) -> f32;
+}
+
+/// Character distribution filter
+///
+/// Detects abnormal character frequency distributions that indicate noise:
+/// - Excessive punctuation (>80%)
+/// - Excessive repetition of same character (>90%)
+/// - Excessive non-alphanumeric characters (>70%)
+pub struct CharDistributionFilter;
+
+impl NoiseFilter for CharDistributionFilter {
+    fn calculate_confidence(&self, text: &str, _context: &FilterContext) -> f32 {
+        if text.is_empty() {
+            return 0.0;
+        }
+
+        let chars: Vec<char> = text.chars().collect();
+        let total = chars.len() as f32;
+
+        // Count character types
+        let mut punctuation_count = 0;
+        let mut alphanumeric_count = 0;
+        let mut char_counts = std::collections::HashMap::new();
+
+        for &ch in &chars {
+            if ch.is_ascii_punctuation() {
+                punctuation_count += 1;
+            }
+            if ch.is_alphanumeric() {
+                alphanumeric_count += 1;
+            }
+            *char_counts.entry(ch).or_insert(0) += 1;
+        }
+
+        // Check for excessive punctuation
+        let punctuation_ratio = punctuation_count as f32 / total;
+        if punctuation_ratio > 0.8 {
+            return 0.2; // Very low confidence
+        }
+
+        // Check for excessive repetition of same character
+        let max_char_count = char_counts.values().max().copied().unwrap_or(0) as f32;
+        let max_char_ratio = max_char_count / total;
+        if max_char_ratio > 0.9 {
+            return 0.1; // Very low confidence (likely padding)
+        }
+
+        // Check for excessive non-alphanumeric
+        let non_alphanumeric_ratio = 1.0 - (alphanumeric_count as f32 / total);
+        if non_alphanumeric_ratio > 0.7 {
+            return 0.3; // Low confidence
+        }
+
+        // Reasonable distribution
+        if punctuation_ratio < 0.3 && max_char_ratio < 0.5 && non_alphanumeric_ratio < 0.4 {
+            1.0 // High confidence
+        } else {
+            0.7 // Moderate confidence
+        }
+    }
+}
+
+/// Entropy-based filter
+///
+/// Uses Shannon entropy to detect low-entropy (padding/repetition) and
+/// high-entropy (random binary) strings. Optimal range for text is 3.5-6.0 bits/byte.
+pub struct EntropyFilter {
+    /// Minimum entropy threshold
+    pub entropy_min: f32,
+    /// Maximum entropy threshold
+    pub entropy_max: f32,
+}
+
+impl EntropyFilter {
+    /// Create a new EntropyFilter with custom thresholds
+    pub fn new(entropy_min: f32, entropy_max: f32) -> Self {
+        Self {
+            entropy_min,
+            entropy_max,
+        }
+    }
+}
+
+impl NoiseFilter for EntropyFilter {
+    fn calculate_confidence(&self, text: &str, _context: &FilterContext) -> f32 {
+        if text.is_empty() {
+            return 0.0;
+        }
+
+        let bytes = text.as_bytes();
+        let entropy = entropy::shannon_entropy(bytes);
+
+        // Very low entropy (< 1.5) - likely padding or repetition
+        if entropy < self.entropy_min {
+            return 0.1;
+        }
+
+        // Very high entropy (> 7.5) - likely random binary
+        if entropy > self.entropy_max {
+            return 0.2;
+        }
+
+        // Optimal range for text: 3.5-6.0 bits/byte
+        if (3.5..=6.0).contains(&entropy) {
+            1.0 // High confidence
+        } else if (2.0..3.5).contains(&entropy) {
+            0.7 // Moderate confidence (low but acceptable)
+        } else if (6.0..=7.0).contains(&entropy) {
+            0.6 // Moderate confidence (high but acceptable)
+        } else {
+            0.4 // Lower confidence (outside optimal range)
+        }
+    }
+}
+
+/// Linguistic pattern filter
+///
+/// Detects word-like patterns by analyzing vowel-to-consonant ratios and
+/// common bigrams. Handles non-English strings gracefully.
+pub struct LinguisticFilter {
+    /// Minimum vowel ratio
+    pub min_vowel_ratio: f32,
+    /// Maximum vowel ratio
+    pub max_vowel_ratio: f32,
+}
+
+impl LinguisticFilter {
+    /// Create a new LinguisticFilter with custom thresholds
+    pub fn new(min_vowel_ratio: f32, max_vowel_ratio: f32) -> Self {
+        Self {
+            min_vowel_ratio,
+            max_vowel_ratio,
+        }
+    }
+}
+
+impl NoiseFilter for LinguisticFilter {
+    fn calculate_confidence(&self, text: &str, _context: &FilterContext) -> f32 {
+        if text.is_empty() {
+            return 0.0;
+        }
+
+        let chars: Vec<char> = text.chars().collect();
+        let total = chars.len() as f32;
+
+        if total == 0.0 {
+            return 0.0;
+        }
+
+        // Count vowels and consonants (case-insensitive)
+        let mut vowel_count = 0;
+        let mut consonant_count = 0;
+
+        for &ch in &chars {
+            let ch_lower = ch.to_ascii_lowercase();
+            match ch_lower {
+                'a' | 'e' | 'i' | 'o' | 'u' => vowel_count += 1,
+                'b' | 'c' | 'd' | 'f' | 'g' | 'h' | 'j' | 'k' | 'l' | 'm' | 'n' | 'p' | 'q'
+                | 'r' | 's' | 't' | 'v' | 'w' | 'x' | 'y' | 'z' => consonant_count += 1,
+                _ => {} // Ignore non-letters
+            }
+        }
+
+        let letter_count = vowel_count + consonant_count;
+        if letter_count == 0 {
+            // No letters, check for numbers/symbols
+            // Strings with only numbers/symbols might still be legitimate
+            return 0.6;
+        }
+
+        let vowel_ratio = vowel_count as f32 / letter_count as f32;
+
+        // Check vowel ratio
+        if vowel_ratio < self.min_vowel_ratio {
+            // Consonant-heavy (might be noise or non-English)
+            return 0.5;
+        }
+        if vowel_ratio > self.max_vowel_ratio {
+            // Vowel-heavy (likely noise)
+            return 0.3;
+        }
+
+        // Check for common English bigrams
+        let common_bigrams = ["th", "he", "in", "er", "an", "re", "on", "at", "en", "nd"];
+        let text_lower = text.to_ascii_lowercase();
+        let mut bigram_count = 0;
+        for bigram in &common_bigrams {
+            if text_lower.contains(bigram) {
+                bigram_count += 1;
+            }
+        }
+
+        // Good vowel ratio and some common bigrams
+        if (0.2..=0.8).contains(&vowel_ratio) && bigram_count > 0 {
+            1.0 // High confidence
+        } else if (0.1..=0.9).contains(&vowel_ratio) {
+            0.7 // Moderate confidence
+        } else {
+            0.4 // Lower confidence
+        }
+    }
+}
+
+/// Length-based filter
+///
+/// Penalizes excessively long strings (likely table data) and very short
+/// strings in low-weight sections.
+pub struct LengthFilter {
+    /// Maximum length before penalty
+    pub max_length: usize,
+}
+
+impl LengthFilter {
+    /// Create a new LengthFilter with custom threshold
+    pub fn new(max_length: usize) -> Self {
+        Self { max_length }
+    }
+}
+
+impl NoiseFilter for LengthFilter {
+    fn calculate_confidence(&self, text: &str, context: &FilterContext) -> f32 {
+        let len = text.len();
+
+        // Excessively long strings are likely table data
+        if len > self.max_length {
+            return 0.3; // Low confidence
+        }
+
+        // Very short strings in low-weight sections are suspicious
+        if len < 4 && context.section_weight < 0.5 {
+            return 0.5; // Moderate confidence
+        }
+
+        // Normal length strings
+        if (4..=100).contains(&len) {
+            1.0 // High confidence
+        } else if (100..=self.max_length).contains(&len) {
+            0.7 // Moderate confidence (long but acceptable)
+        } else {
+            0.6 // Lower confidence
+        }
+    }
+}
+
+/// Repetition detection filter
+///
+/// Detects repeated character patterns (e.g., "AAAA", "0000") and
+/// repeated substrings (e.g., "abcabcabc").
+pub struct RepetitionFilter {
+    /// Maximum ratio of repeated characters
+    pub max_repetition_ratio: f32,
+}
+
+impl RepetitionFilter {
+    /// Create a new RepetitionFilter with custom threshold
+    pub fn new(max_repetition_ratio: f32) -> Self {
+        Self {
+            max_repetition_ratio,
+        }
+    }
+}
+
+impl NoiseFilter for RepetitionFilter {
+    fn calculate_confidence(&self, text: &str, _context: &FilterContext) -> f32 {
+        if text.is_empty() {
+            return 0.0;
+        }
+
+        let chars: Vec<char> = text.chars().collect();
+        let total = chars.len() as f32;
+
+        // Check for repeated characters
+        let mut char_counts = std::collections::HashMap::new();
+        for &ch in &chars {
+            *char_counts.entry(ch).or_insert(0) += 1;
+        }
+
+        let max_char_count = char_counts.values().max().copied().unwrap_or(0) as f32;
+        let max_char_ratio = max_char_count / total;
+
+        if max_char_ratio > self.max_repetition_ratio {
+            return 0.1; // Very low confidence (likely padding)
+        }
+
+        // Check for repeated substrings (optimized to avoid O(n^3))
+        // Cap pattern_len to a small bound (8-16) to avoid excessive computation
+        let max_pattern_len = (total as usize / 3).min(16).min(chars.len());
+
+        if total >= 6.0 && max_pattern_len > 0 {
+            // Early exit optimization: if we can't possibly get 3 repetitions, skip
+            let min_pattern_len_for_3_reps = ((total as usize) as f32 / 3.0).ceil() as usize;
+            if min_pattern_len_for_3_reps > max_pattern_len {
+                return 1.0; // Can't have 3 repetitions, so no issue
+            }
+
+            // Check patterns starting from length 1 up to max_pattern_len
+            for pattern_len in 1..=max_pattern_len {
+                // Early exit: if pattern_len is too large to repeat 3 times, skip
+                if pattern_len * 3 > chars.len() {
+                    break;
+                }
+
+                // Use slice comparison instead of constructing String
+                let pattern_slice = &chars[0..pattern_len];
+                let mut count = 1; // First occurrence
+                let mut pos = pattern_len;
+
+                // Check for repetitions
+                while pos + pattern_len <= chars.len() && count < 3 {
+                    let candidate_slice = &chars[pos..pos + pattern_len];
+                    // Compare slices directly (char comparison)
+                    if pattern_slice == candidate_slice {
+                        count += 1;
+                        pos += pattern_len;
+                    } else {
+                        break; // Pattern broken, try next pattern length
+                    }
+                }
+
+                if count >= 3 {
+                    return 0.2; // Low confidence (repetitive pattern)
+                }
+            }
+        }
+
+        // No significant repetition
+        1.0
+    }
+}
+
+/// Context-aware filter
+///
+/// Boosts confidence for strings in high-weight sections (.rodata, .rdata, __cstring)
+/// and reduces confidence for strings in code sections. Considers section permissions.
+pub struct ContextFilter;
+
+impl NoiseFilter for ContextFilter {
+    fn calculate_confidence(&self, _text: &str, context: &FilterContext) -> f32 {
+        // Boost confidence for high-weight sections
+        match context.section_type {
+            SectionType::StringData => {
+                // .rodata, .rdata, __cstring - very likely to contain strings
+                if !context.is_executable && !context.is_writable {
+                    return 1.0; // Read-only string data section
+                }
+                0.9 // String data section (even if writable)
+            }
+            SectionType::ReadOnlyData => {
+                // Read-only data sections
+                if !context.is_executable {
+                    return 0.9;
+                }
+                0.7
+            }
+            SectionType::Resources => {
+                // PE resource sections
+                1.0 // Resources are known-good sources
+            }
+            SectionType::Code => {
+                // Code sections - less likely to contain strings
+                if context.section_weight < 0.3 {
+                    return 0.3; // Low-weight code section
+                }
+                0.5 // Code section with some weight
+            }
+            SectionType::WritableData => {
+                // Writable data sections - moderate confidence
+                0.6
+            }
+            SectionType::Debug => {
+                // Debug sections - may contain strings but lower confidence
+                0.5
+            }
+            SectionType::Other => {
+                // Unknown sections - use section weight as guide
+                if context.section_weight > 0.7 {
+                    0.7
+                } else if context.section_weight > 0.4 {
+                    0.5
+                } else {
+                    0.3
+                }
+            }
+        }
+    }
+}
+
+/// Composite noise filter
+///
+/// Combines multiple filters with configurable weights to produce an overall
+/// confidence score. Allows enabling/disabling individual filters.
+pub struct CompositeNoiseFilter {
+    /// Entropy filter
+    pub entropy_filter: EntropyFilter,
+    /// Character distribution filter
+    pub char_distribution_filter: CharDistributionFilter,
+    /// Linguistic filter
+    pub linguistic_filter: LinguisticFilter,
+    /// Length filter
+    pub length_filter: LengthFilter,
+    /// Repetition filter
+    pub repetition_filter: RepetitionFilter,
+    /// Context filter
+    pub context_filter: ContextFilter,
+    /// Filter weights
+    pub weights: FilterWeights,
+    /// Whether to enable entropy filter
+    pub enable_entropy: bool,
+    /// Whether to enable character distribution filter
+    pub enable_char_distribution: bool,
+    /// Whether to enable linguistic filter
+    pub enable_linguistic: bool,
+    /// Whether to enable length filter
+    pub enable_length: bool,
+    /// Whether to enable repetition filter
+    pub enable_repetition: bool,
+    /// Whether to enable context filter
+    pub enable_context: bool,
+}
+
+impl CompositeNoiseFilter {
+    /// Create a new CompositeNoiseFilter with default configuration
+    pub fn new(config: &NoiseFilterConfig) -> Self {
+        Self {
+            entropy_filter: EntropyFilter::new(config.entropy_min, config.entropy_max),
+            char_distribution_filter: CharDistributionFilter,
+            linguistic_filter: LinguisticFilter::new(
+                config.min_vowel_ratio,
+                config.max_vowel_ratio,
+            ),
+            length_filter: LengthFilter::new(config.max_length),
+            repetition_filter: RepetitionFilter::new(config.max_repetition_ratio),
+            context_filter: ContextFilter,
+            weights: config.filter_weights.clone(),
+            enable_entropy: true,
+            enable_char_distribution: true,
+            enable_linguistic: true,
+            enable_length: true,
+            enable_repetition: true,
+            enable_context: true,
+        }
+    }
+
+    /// Calculate overall confidence score by combining all enabled filters
+    pub fn calculate_confidence(&self, text: &str, context: &FilterContext) -> f32 {
+        let mut total_weight = 0.0;
+        let mut weighted_sum = 0.0;
+
+        if self.enable_entropy {
+            let score = self.entropy_filter.calculate_confidence(text, context);
+            weighted_sum += score * self.weights.entropy_weight;
+            total_weight += self.weights.entropy_weight;
+        }
+
+        if self.enable_char_distribution {
+            let score = self
+                .char_distribution_filter
+                .calculate_confidence(text, context);
+            weighted_sum += score * self.weights.char_distribution_weight;
+            total_weight += self.weights.char_distribution_weight;
+        }
+
+        if self.enable_linguistic {
+            let score = self.linguistic_filter.calculate_confidence(text, context);
+            weighted_sum += score * self.weights.linguistic_weight;
+            total_weight += self.weights.linguistic_weight;
+        }
+
+        if self.enable_length {
+            let score = self.length_filter.calculate_confidence(text, context);
+            weighted_sum += score * self.weights.length_weight;
+            total_weight += self.weights.length_weight;
+        }
+
+        if self.enable_repetition {
+            let score = self.repetition_filter.calculate_confidence(text, context);
+            weighted_sum += score * self.weights.repetition_weight;
+            total_weight += self.weights.repetition_weight;
+        }
+
+        if self.enable_context {
+            let score = self.context_filter.calculate_confidence(text, context);
+            weighted_sum += score * self.weights.context_weight;
+            total_weight += self.weights.context_weight;
+        }
+
+        // Normalize by total weight (in case some filters are disabled)
+        if total_weight > 0.0 {
+            weighted_sum / total_weight
+        } else {
+            0.5 // Default if all filters disabled
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_char_distribution_filter() {
+        let filter = CharDistributionFilter;
+        let context = FilterContext::default();
+
+        // Normal text
+        assert!(filter.calculate_confidence("Hello, World!", &context) > 0.7);
+
+        // Excessive punctuation
+        assert!(filter.calculate_confidence("!!!@@@###$$$", &context) < 0.5);
+
+        // Repeated character
+        assert!(filter.calculate_confidence("AAAA", &context) < 0.5);
+    }
+
+    #[test]
+    fn test_entropy_filter() {
+        let filter = EntropyFilter::new(1.5, 7.5);
+        let context = FilterContext::default();
+
+        // Normal text
+        assert!(filter.calculate_confidence("Hello, World!", &context) > 0.5);
+
+        // Low entropy (repetition)
+        assert!(filter.calculate_confidence("AAAA", &context) < 0.5);
+
+        // High entropy (random-like string with many different characters)
+        // Note: This string may not always have entropy > 7.5 due to repetition of patterns
+        // The test verifies that very high entropy strings get lower confidence
+        let random = "!@#$%^&*()_+-=[]{}|;':\",./<>?`~abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+        let random_confidence = filter.calculate_confidence(random, &context);
+        // High entropy strings should have lower confidence than normal text
+        let normal_confidence = filter.calculate_confidence("Hello, World!", &context);
+        assert!(
+            random_confidence < normal_confidence,
+            "High entropy string should have lower confidence than normal text (random: {}, normal: {})",
+            random_confidence,
+            normal_confidence
+        );
+    }
+
+    #[test]
+    fn test_linguistic_filter() {
+        let filter = LinguisticFilter::new(0.1, 0.9);
+        let context = FilterContext::default();
+
+        // Normal English text
+        assert!(filter.calculate_confidence("Hello world", &context) > 0.7);
+
+        // Consonant-heavy
+        assert!(filter.calculate_confidence("bcdfghjklmnpqrstvwxyz", &context) < 0.7);
+
+        // Vowel-heavy
+        assert!(filter.calculate_confidence("aeiouaeiou", &context) < 0.7);
+    }
+
+    #[test]
+    fn test_length_filter() {
+        let filter = LengthFilter::new(200);
+        let context = FilterContext::default();
+
+        // Normal length
+        assert!(filter.calculate_confidence("Hello", &context) > 0.7);
+
+        // Very long
+        let long_string = "A".repeat(300);
+        assert!(filter.calculate_confidence(&long_string, &context) < 0.5);
+
+        // Very short in low-weight section
+        let low_weight_context = FilterContext {
+            section_weight: 0.3,
+            ..Default::default()
+        };
+        assert!(filter.calculate_confidence("Hi", &low_weight_context) < 0.7);
+    }
+
+    #[test]
+    fn test_repetition_filter() {
+        let filter = RepetitionFilter::new(0.7);
+        let context = FilterContext::default();
+
+        // Normal text
+        assert!(filter.calculate_confidence("Hello", &context) > 0.7);
+
+        // Repeated characters
+        assert!(filter.calculate_confidence("AAAA", &context) < 0.5);
+
+        // Repeated pattern
+        assert!(filter.calculate_confidence("abcabcabc", &context) < 0.5);
+    }
+
+    #[test]
+    fn test_context_filter() {
+        let filter = ContextFilter;
+
+        // String data section
+        let context = FilterContext {
+            section_type: SectionType::StringData,
+            is_executable: false,
+            is_writable: false,
+            ..Default::default()
+        };
+        assert!(filter.calculate_confidence("test", &context) > 0.8);
+
+        // Code section
+        let context = FilterContext {
+            section_type: SectionType::Code,
+            section_weight: 0.1,
+            ..Default::default()
+        };
+        assert!(filter.calculate_confidence("test", &context) < 0.5);
+    }
+
+    #[test]
+    fn test_composite_filter() {
+        let config = NoiseFilterConfig::default();
+        let filter = CompositeNoiseFilter::new(&config);
+        let context = FilterContext::default();
+
+        // Normal text should have high confidence
+        let score = filter.calculate_confidence("Hello, World!", &context);
+        assert!(score > 0.5);
+
+        // Noise should have low confidence
+        let noise_score = filter.calculate_confidence("AAAA", &context);
+        assert!(noise_score < score);
+    }
+}
diff --git a/src/extraction/macho_load_commands.rs b/src/extraction/macho_load_commands.rs
index c344bbb..35a3254 100644
--- a/src/extraction/macho_load_commands.rs
+++ b/src/extraction/macho_load_commands.rs
@@ -108,6 +108,7 @@ fn extract_dylib_strings(macho: &MachO) -> Vec<FoundString> {
             rva: None,
             length,
             score: 0,
+            confidence: 1.0,
         });
     }
 
@@ -136,6 +137,7 @@ fn extract_rpath_strings(macho: &MachO) -> Vec<FoundString> {
             rva: None,
             length,
             score: 0,
+            confidence: 1.0,
         });
     }
 
diff --git a/src/extraction/mod.rs b/src/extraction/mod.rs
index 91c99cb..9769b75 100644
--- a/src/extraction/mod.rs
+++ b/src/extraction/mod.rs
@@ -4,6 +4,19 @@
 //! Each extractor is designed to work with a specific binary format and leverage
 //! format-specific knowledge to extract meaningful strings.
 //!
+//! ## Core String Extraction Framework
+//!
+//! The core extraction framework provides a trait-based architecture for extracting
+//! strings from binary data:
+//!
+//! - `StringExtractor`: Trait defining extraction methods
+//! - `ExtractionConfig`: Configuration for controlling extraction behavior
+//! - `BasicExtractor`: Sequential ASCII/UTF-8 string scanner implementation
+//!
+//! **Note**: These types (`StringExtractor`, `ExtractionConfig`, `BasicExtractor`) are
+//! defined locally in this module and should not be imported within `extraction/mod.rs`.
+//! Downstream code should import them from `stringy::extraction` or `stringy` (via re-exports).
+//!
 //! ## PE Resource String Extraction (Phase 2 Complete)
 //!
 //! The PE resource extraction module now provides comprehensive string extraction:
@@ -11,6 +24,30 @@
 //! - `extract_resources()`: Returns resource metadata (Phase 1)
 //! - `extract_resource_strings()`: Returns actual strings from resources (Phase 2)
 //!
+//! ## ASCII String Extraction
+//!
+//! The ASCII extraction module provides foundational encoding extraction for StringyMcStringFace.
+//! It implements byte-level scanning for contiguous printable ASCII sequences and serves as the
+//! reference implementation for future UTF-8, UTF-16LE, and UTF-16BE extractors.
+//!
+//! - `extract_ascii_strings()`: Basic byte-level ASCII string scanning
+//! - `extract_from_section()`: Section-aware extraction with proper metadata population
+//! - `AsciiExtractionConfig`: Configuration for minimum/maximum length filtering
+//!
+//! # ASCII Extraction Example
+//!
+//! ```rust
+//! use stringy::extraction::ascii::{extract_ascii_strings, AsciiExtractionConfig};
+//!
+//! let data = b"Hello\0World\0Test123";
+//! let config = AsciiExtractionConfig::default();
+//! let strings = extract_ascii_strings(data, &config);
+//!
+//! for string in strings {
+//!     println!("Found: {} at offset {}", string.text, string.offset);
+//! }
+//! ```
+//!
 //! ## Mach-O Load Command String Extraction
 //!
 //! The Mach-O load command extraction module extracts library dependencies and runtime
@@ -22,23 +59,1116 @@
 //! # Example
 //!
 //! ```rust
-//! use stringy::extraction::{extract_resources, extract_resource_strings, extract_load_command_strings};
+//! use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+//! use stringy::container::{detect_format, create_parser};
+//!
+//! let data = std::fs::read("example.exe")?;
+//! let format = detect_format(&data);
+//! let parser = create_parser(format)?;
+//! let container_info = parser.parse(&data)?;
+//!
+//! let extractor = BasicExtractor::new();
+//! let config = ExtractionConfig::default();
+//! let strings = extractor.extract(&data, &container_info, &config)?;
 //!
-//! let pe_data = std::fs::read("example.exe")?;
+//! // Format-specific extractors
+//! use stringy::extraction::{
+//!     extract_ascii_strings, extract_load_command_strings, extract_resources,
+//!     extract_resource_strings, AsciiExtractionConfig,
+//! };
+//!
+//! // ASCII extraction
+//! let ascii_config = AsciiExtractionConfig::default();
+//! let ascii_strings = extract_ascii_strings(&data, &ascii_config);
 //!
 //! // Phase 1: Get resource metadata
-//! let metadata = extract_resources(&pe_data);
+//! let metadata = extract_resources(&data);
 //!
 //! // Phase 2: Extract actual strings from resources
-//! let strings = extract_resource_strings(&pe_data);
+//! let resource_strings = extract_resource_strings(&data);
 //!
 //! // Mach-O load command extraction
 //! let macho_data = std::fs::read("example.dylib")?;
 //! let load_command_strings = extract_load_command_strings(&macho_data);
 //! ```
 
+use crate::types::{
+    ContainerInfo, Encoding, FoundString, Result, SectionInfo, SectionType, StringSource,
+};
+
+pub mod ascii;
+pub mod config;
+pub mod filters;
 pub mod macho_load_commands;
 pub mod pe_resources;
 
+pub use ascii::{AsciiExtractionConfig, extract_ascii_strings, extract_from_section};
+pub use config::{FilterWeights, NoiseFilterConfig};
+pub use filters::{CompositeNoiseFilter, FilterContext, NoiseFilter};
 pub use macho_load_commands::extract_load_command_strings;
 pub use pe_resources::{extract_resource_strings, extract_resources};
+
+/// Configuration for string extraction
+///
+/// Controls various aspects of the extraction process including minimum/maximum
+/// string lengths, encoding selection, section filtering, and noise filtering.
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::ExtractionConfig;
+///
+/// // Use default configuration
+/// let config = ExtractionConfig::default();
+///
+/// // Customize configuration
+/// let mut config = ExtractionConfig::default();
+/// config.min_length = 8;
+/// config.max_length = 2048;
+/// config.scan_code_sections = false;
+/// config.noise_filtering_enabled = true;
+/// config.min_confidence_threshold = 0.6;
+/// ```
+#[derive(Debug, Clone)]
+pub struct ExtractionConfig {
+    /// Minimum string length in bytes (default: 4)
+    pub min_length: usize,
+    /// Maximum string length in bytes (default: 4096)
+    pub max_length: usize,
+    /// Encodings to search for (default: ASCII, UTF-8)
+    pub encodings: Vec<Encoding>,
+    /// Whether to scan executable sections (default: true)
+    pub scan_code_sections: bool,
+    /// Whether to include debug sections (default: false)
+    pub include_debug: bool,
+    /// Section types to prioritize (default: StringData, ReadOnlyData, Resources)
+    pub section_priority: Vec<SectionType>,
+    /// Whether to include import/export names (default: true)
+    pub include_symbols: bool,
+    /// Minimum length for ASCII strings (default: 4, same as min_length)
+    pub min_ascii_length: usize,
+    /// Minimum length for UTF-16 strings (default: 3, for future use)
+    pub min_wide_length: usize,
+    /// Which encodings to extract (default: ASCII, UTF-8)
+    pub enabled_encodings: Vec<Encoding>,
+    /// Enable/disable noise filtering (default: true)
+    pub noise_filtering_enabled: bool,
+    /// Minimum confidence threshold to include string (default: 0.5)
+    ///
+    /// Strings with confidence below this threshold will be filtered out.
+    pub min_confidence_threshold: f32,
+}
+
+impl Default for ExtractionConfig {
+    fn default() -> Self {
+        Self {
+            min_length: 4,
+            max_length: 4096,
+            encodings: vec![Encoding::Ascii, Encoding::Utf8],
+            scan_code_sections: true,
+            include_debug: false,
+            section_priority: vec![
+                SectionType::StringData,
+                SectionType::ReadOnlyData,
+                SectionType::Resources,
+            ],
+            include_symbols: true,
+            min_ascii_length: 4,
+            min_wide_length: 3,
+            enabled_encodings: vec![Encoding::Ascii, Encoding::Utf8],
+            noise_filtering_enabled: true,
+            min_confidence_threshold: 0.5,
+        }
+    }
+}
+
+impl ExtractionConfig {
+    /// Validate the configuration
+    ///
+    /// Returns an error if any thresholds are invalid.
+    pub fn validate(&self) -> Result<()> {
+        if self.min_length == 0 {
+            return Err(crate::types::StringyError::ConfigError(
+                "min_length must be greater than 0".to_string(),
+            ));
+        }
+        if self.min_ascii_length == 0 {
+            return Err(crate::types::StringyError::ConfigError(
+                "min_ascii_length must be greater than 0".to_string(),
+            ));
+        }
+        if self.min_wide_length == 0 {
+            return Err(crate::types::StringyError::ConfigError(
+                "min_wide_length must be greater than 0".to_string(),
+            ));
+        }
+        if !(0.0..=1.0).contains(&self.min_confidence_threshold) {
+            return Err(crate::types::StringyError::ConfigError(
+                "min_confidence_threshold must be between 0.0 and 1.0".to_string(),
+            ));
+        }
+        Ok(())
+    }
+}
+
+/// Trait for extracting strings from binary data
+///
+/// Implementations of this trait provide different strategies for extracting
+/// strings from binary files, ranging from simple sequential scanning to
+/// format-specific extraction algorithms.
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+/// use stringy::container::{detect_format, create_parser};
+///
+/// let data = std::fs::read("binary_file")?;
+/// let format = detect_format(&data);
+/// let parser = create_parser(format)?;
+/// let container_info = parser.parse(&data)?;
+///
+/// let extractor = BasicExtractor::new();
+/// let config = ExtractionConfig::default();
+/// let strings = extractor.extract(&data, &container_info, &config)?;
+/// ```
+pub trait StringExtractor {
+    /// Extract strings from entire binary using container metadata
+    ///
+    /// This method iterates through all sections in the container and extracts
+    /// strings from each section based on the provided configuration.
+    ///
+    /// # Arguments
+    ///
+    /// * `data` - Raw binary data
+    /// * `container_info` - Container metadata including sections
+    /// * `config` - Extraction configuration
+    ///
+    /// # Returns
+    ///
+    /// Vector of found strings with metadata
+    fn extract(
+        &self,
+        data: &[u8],
+        container_info: &ContainerInfo,
+        config: &ExtractionConfig,
+    ) -> Result<Vec<FoundString>>;
+
+    /// Extract strings from a specific section
+    ///
+    /// This method extracts strings from a single section, useful for targeted
+    /// extraction or when working with individual sections.
+    ///
+    /// # Arguments
+    ///
+    /// * `data` - Raw binary data
+    /// * `section` - Section metadata
+    /// * `config` - Extraction configuration
+    ///
+    /// # Returns
+    ///
+    /// Vector of found strings from the section
+    fn extract_from_section(
+        &self,
+        data: &[u8],
+        section: &SectionInfo,
+        config: &ExtractionConfig,
+    ) -> Result<Vec<FoundString>>;
+}
+
+/// Basic sequential string extractor
+///
+/// Implements a simple sequential scanning algorithm for extracting ASCII and
+/// UTF-8 strings from binary data. This extractor scans byte sequences looking
+/// for printable characters and validates UTF-8 encoding.
+///
+/// # Example
+///
+/// ```rust
+/// use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+/// use stringy::types::{ContainerInfo, SectionInfo, SectionType, BinaryFormat};
+///
+/// let extractor = BasicExtractor::new();
+/// let config = ExtractionConfig::default();
+///
+/// // Create a simple container info for testing
+/// let section = SectionInfo {
+///     name: ".rodata".to_string(),
+///     offset: 0,
+///     size: 100,
+///     rva: Some(0x1000),
+///     section_type: SectionType::StringData,
+///     is_executable: false,
+///     is_writable: false,
+///     weight: 1.0,
+/// };
+///
+/// let container_info = ContainerInfo::new(
+///     BinaryFormat::Elf,
+///     vec![section],
+///     vec![],
+///     vec![],
+///     None,
+/// );
+///
+/// let data = b"Hello World\0Test String\0";
+/// let strings = extractor.extract(data, &container_info, &config)?;
+/// ```
+#[derive(Debug, Clone)]
+pub struct BasicExtractor;
+
+impl BasicExtractor {
+    /// Create a new BasicExtractor instance
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl Default for BasicExtractor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl StringExtractor for BasicExtractor {
+    fn extract(
+        &self,
+        data: &[u8],
+        container_info: &ContainerInfo,
+        config: &ExtractionConfig,
+    ) -> Result<Vec<FoundString>> {
+        let mut all_strings = Vec::new();
+
+        // Sort sections by priority from config.section_priority
+        let mut sections: Vec<_> = container_info.sections.iter().collect();
+        sections.sort_by_key(|section| {
+            config
+                .section_priority
+                .iter()
+                .position(|&st| st == section.section_type)
+                .unwrap_or_else(|| {
+                    // Fallback to section weight (higher weight = higher priority)
+                    // Convert weight to usize for consistent key type
+                    // Use a large offset to ensure fallback sections sort after prioritized ones
+                    let weight_int = (section.weight * 1000.0) as usize;
+                    config.section_priority.len() + (10000 - weight_int.min(10000))
+                })
+        });
+
+        for section in sections {
+            // Filter sections based on config
+            if section.section_type == SectionType::Debug && !config.include_debug {
+                continue;
+            }
+
+            // Filter code sections by both type and executable flag
+            if (section.section_type == SectionType::Code || section.is_executable)
+                && !config.scan_code_sections
+            {
+                continue;
+            }
+
+            // Extract strings from this section
+            let section_strings = self.extract_from_section(data, section, config)?;
+            all_strings.extend(section_strings);
+        }
+
+        // Include import/export symbols if configured
+        if config.include_symbols {
+            // Add import names
+            for import in &container_info.imports {
+                let length = import.name.len() as u32;
+                all_strings.push(FoundString {
+                    text: import.name.clone(),
+                    encoding: Encoding::Utf8,
+                    offset: 0,
+                    rva: None,
+                    section: None,
+                    length,
+                    tags: Vec::new(),
+                    score: 0,
+                    source: StringSource::ImportName,
+                    confidence: 1.0,
+                });
+            }
+
+            // Add export names
+            for export in &container_info.exports {
+                let length = export.name.len() as u32;
+                all_strings.push(FoundString {
+                    text: export.name.clone(),
+                    encoding: Encoding::Utf8,
+                    offset: 0,
+                    rva: None,
+                    section: None,
+                    length,
+                    tags: Vec::new(),
+                    score: 0,
+                    source: StringSource::ExportName,
+                    confidence: 1.0,
+                });
+            }
+        }
+
+        Ok(all_strings)
+    }
+
+    fn extract_from_section(
+        &self,
+        data: &[u8],
+        section: &SectionInfo,
+        config: &ExtractionConfig,
+    ) -> Result<Vec<FoundString>> {
+        // Early return for zero-sized sections
+        if section.size == 0 {
+            return Ok(Vec::new());
+        }
+
+        // Validate section bounds
+        let section_offset = section.offset as usize;
+        let section_size = section.size as usize;
+
+        if section_offset >= data.len() {
+            return Ok(Vec::new());
+        }
+
+        let end_offset = section_offset
+            .checked_add(section_size)
+            .unwrap_or(data.len())
+            .min(data.len());
+
+        let section_data = &data[section_offset..end_offset];
+
+        // Use ASCII extractor for ASCII strings
+        let ascii_config = ascii::AsciiExtractionConfig {
+            min_length: config.min_ascii_length.max(config.min_length),
+            max_length: Some(config.max_length),
+        };
+
+        // Build noise filter config from extraction config
+        let noise_filter_config = if config.noise_filtering_enabled {
+            Some(crate::extraction::config::NoiseFilterConfig::default())
+        } else {
+            None
+        };
+
+        // Extract ASCII strings using the dedicated ASCII extractor with filtering
+        let mut found_strings = ascii::extract_from_section(
+            section,
+            data,
+            &ascii_config,
+            noise_filter_config.as_ref(),
+            config.noise_filtering_enabled,
+            config.min_confidence_threshold,
+        );
+
+        // For UTF-8 strings, use the existing helper (only if UTF-8 is enabled)
+        // Check both encodings and enabled_encodings fields
+        let utf8_enabled = config.encodings.contains(&Encoding::Utf8)
+            || config.enabled_encodings.contains(&Encoding::Utf8);
+        if utf8_enabled {
+            let raw_strings =
+                extract_ascii_utf8_strings(section_data, config.min_length, config.max_length);
+
+            // Build filter context for UTF-8 strings
+            let filter_context = crate::extraction::filters::FilterContext::from_section(section);
+            let filter = if config.noise_filtering_enabled {
+                noise_filter_config
+                    .as_ref()
+                    .map(crate::extraction::filters::CompositeNoiseFilter::new)
+            } else {
+                None
+            };
+
+            for (text, relative_offset, length) in raw_strings {
+                // Skip if already extracted as ASCII
+                if text.is_ascii() {
+                    continue;
+                }
+
+                // Determine encoding
+                let encoding = Encoding::Utf8;
+
+                // Filter by configured encodings (check both fields)
+                let encoding_allowed = config.encodings.contains(&encoding)
+                    || config.enabled_encodings.contains(&encoding);
+                if !encoding_allowed {
+                    continue;
+                }
+
+                // Compute confidence if filtering is enabled
+                let confidence = if let Some(ref noise_filter) = filter {
+                    noise_filter.calculate_confidence(&text, &filter_context)
+                } else {
+                    1.0
+                };
+
+                // Apply threshold filtering
+                if config.noise_filtering_enabled && confidence < config.min_confidence_threshold {
+                    continue;
+                }
+
+                // Calculate absolute offset
+                let absolute_offset = section.offset + relative_offset as u64;
+
+                // Calculate RVA if available
+                let rva = section
+                    .rva
+                    .map(|base_rva| base_rva + relative_offset as u64);
+
+                let found_string = FoundString {
+                    text,
+                    encoding,
+                    offset: absolute_offset,
+                    rva,
+                    section: Some(section.name.clone()),
+                    length: length as u32,
+                    tags: Vec::new(),
+                    score: 0,
+                    source: StringSource::SectionData,
+                    confidence,
+                };
+
+                found_strings.push(found_string);
+            }
+        }
+
+        Ok(found_strings)
+    }
+}
+
+/// Check if a byte is printable text (ASCII or common whitespace)
+///
+/// Printable text includes characters from 0x20 (space) to 0x7E (~),
+/// plus common whitespace characters: tab (0x09), newline (0x0A), and
+/// carriage return (0x0D).
+///
+/// **Note on printable character definitions**: This function is used by the UTF-8-capable
+/// extraction helpers and includes common whitespace characters (tab, newline, carriage return)
+/// to handle text files and formatted data. This differs from the ASCII-only `is_printable_ascii`
+/// function in `extraction::ascii`, which only considers the strict printable range (0x20-0x7E)
+/// without whitespace control characters. This difference ensures that:
+/// - ASCII-only extraction (`extraction::ascii`) produces strict, predictable results
+/// - UTF-8-capable extraction (this module) can handle formatted text with line breaks
+///
+/// When using both extractors on the same data, be aware that they may produce different
+/// results due to this definitional difference.
+fn is_printable_text_byte(byte: u8) -> bool {
+    matches!(byte, 0x09 | 0x0A | 0x0D | 0x20..=0x7E)
+}
+
+/// Check if a byte could be part of a valid UTF-8 sequence
+///
+/// This includes printable ASCII, UTF-8 continuation bytes (0x80-0xBF),
+/// and UTF-8 start bytes (0xC2-0xF4 for valid UTF-8 sequences).
+fn could_be_utf8_byte(byte: u8) -> bool {
+    is_printable_text_byte(byte) || matches!(byte, 0x80..=0xBF | 0xC2..=0xF4)
+}
+
+/// Extract ASCII and UTF-8 strings from byte data
+///
+/// Scans through the byte data looking for sequences of printable characters
+/// and valid UTF-8 sequences. When a byte that cannot be part of a valid
+/// string is encountered, checks if the accumulated sequence meets the minimum
+/// length requirement and validates it as UTF-8. Strings exceeding max_length
+/// are skipped during extraction.
+///
+/// # Arguments
+///
+/// * `data` - Byte slice to scan
+/// * `min_length` - Minimum string length in bytes
+/// * `max_length` - Maximum string length in bytes
+///
+/// # Returns
+///
+/// Vector of tuples containing (text, relative_offset, length)
+fn extract_ascii_utf8_strings(
+    data: &[u8],
+    min_length: usize,
+    max_length: usize,
+) -> Vec<(String, usize, usize)> {
+    let mut strings = Vec::new();
+    let mut current_string_start: Option<usize> = None;
+    let mut current_string_bytes = Vec::new();
+
+    for (i, &byte) in data.iter().enumerate() {
+        if could_be_utf8_byte(byte) {
+            if current_string_start.is_none() {
+                current_string_start = Some(i);
+            }
+            current_string_bytes.push(byte);
+        } else {
+            // End of current string candidate
+            // Check length conditions first, then extract start to avoid borrow checker issues
+            #[allow(clippy::collapsible_if)]
+            if current_string_bytes.len() >= min_length && current_string_bytes.len() <= max_length
+            {
+                if let Some(start) = current_string_start {
+                    // Store length before moving
+                    let len = current_string_bytes.len();
+                    // Move buffer out to avoid cloning
+                    let bytes = std::mem::take(&mut current_string_bytes);
+                    // Try to convert to UTF-8 string
+                    match String::from_utf8(bytes) {
+                        Ok(text) => {
+                            // Create entry tuple to move text into it explicitly
+                            let entry = (text, start, len);
+                            strings.push(entry);
+                        }
+                        Err(_) => {
+                            // Invalid UTF-8, skip this candidate
+                        }
+                    }
+                }
+            }
+            current_string_start = None;
+            current_string_bytes.clear();
+        }
+    }
+
+    // Handle string at end of data
+    // Check length conditions first, then extract start to avoid borrow checker issues
+    #[allow(clippy::collapsible_if)]
+    if current_string_bytes.len() >= min_length && current_string_bytes.len() <= max_length {
+        if let Some(start) = current_string_start {
+            // Store length before moving
+            let len = current_string_bytes.len();
+            // Move buffer out to avoid cloning
+            let bytes = std::mem::take(&mut current_string_bytes);
+            match String::from_utf8(bytes) {
+                Ok(text) => {
+                    // Create entry tuple to move text into it explicitly
+                    let entry = (text, start, len);
+                    strings.push(entry);
+                }
+                Err(_) => {
+                    // Invalid UTF-8, skip
+                }
+            }
+        }
+    }
+
+    strings
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{BinaryFormat, ExportInfo, ImportInfo, SectionType};
+
+    #[test]
+    fn test_is_printable_text_byte() {
+        // Printable ASCII
+        assert!(is_printable_text_byte(b' '));
+        assert!(is_printable_text_byte(b'A'));
+        assert!(is_printable_text_byte(b'z'));
+        assert!(is_printable_text_byte(b'0'));
+        assert!(is_printable_text_byte(b'9'));
+        assert!(is_printable_text_byte(b'~'));
+
+        // Common whitespace
+        assert!(is_printable_text_byte(b'\t'));
+        assert!(is_printable_text_byte(b'\n'));
+        assert!(is_printable_text_byte(b'\r'));
+
+        // Non-printable
+        assert!(!is_printable_text_byte(0x00));
+        assert!(!is_printable_text_byte(0x1F));
+        assert!(!is_printable_text_byte(0x7F));
+        assert!(!is_printable_text_byte(0xFF));
+    }
+
+    #[test]
+    fn test_extract_ascii_utf8_strings() {
+        // Test with ASCII strings
+        let data = b"Hello\0World\0Test123";
+        let strings = extract_ascii_utf8_strings(data, 4, 4096);
+        assert_eq!(strings.len(), 3);
+        assert_eq!(strings[0].0, "Hello");
+        assert_eq!(strings[0].1, 0);
+        assert_eq!(strings[1].0, "World");
+        assert_eq!(strings[1].1, 6);
+        assert_eq!(strings[2].0, "Test123");
+        assert_eq!(strings[2].1, 12);
+    }
+
+    #[test]
+    fn test_extract_ascii_utf8_strings_utf8() {
+        // Test with UTF-8 strings
+        let data = "Hello 世界\0Test".as_bytes();
+        let strings = extract_ascii_utf8_strings(data, 4, 4096);
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].0, "Hello 世界");
+        assert_eq!(strings[1].0, "Test");
+    }
+
+    #[test]
+    fn test_extract_ascii_utf8_strings_min_length() {
+        // Test minimum length filtering
+        let data = b"Hi\0Test\0AB\0LongString";
+        let strings = extract_ascii_utf8_strings(data, 4, 4096);
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].0, "Test");
+        assert_eq!(strings[1].0, "LongString");
+    }
+
+    #[test]
+    fn test_extract_ascii_utf8_strings_empty() {
+        // Test with empty data
+        let data = b"";
+        let strings = extract_ascii_utf8_strings(data, 4, 4096);
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_extract_ascii_utf8_strings_binary() {
+        // Test with binary data
+        let data = &[0x00, 0xFF, 0x01, 0x02, 0x03];
+        let strings = extract_ascii_utf8_strings(data, 4, 4096);
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_extract_ascii_utf8_strings_at_boundaries() {
+        // Test strings at start and end
+        let data = b"Start\0Middle\0EndTest";
+        let strings = extract_ascii_utf8_strings(data, 4, 4096);
+        assert_eq!(strings.len(), 3);
+        assert_eq!(strings[0].0, "Start");
+        assert_eq!(strings[0].1, 0);
+        assert_eq!(strings[2].0, "EndTest");
+    }
+
+    #[test]
+    fn test_extract_ascii_utf8_strings_max_length() {
+        // Test maximum length filtering in helper
+        let data = b"Short\0VeryLongStringHere";
+        let strings = extract_ascii_utf8_strings(data, 4, 10);
+        // Only "Short" should pass max_length filter
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].0, "Short");
+        assert!(!strings.iter().any(|s| s.0 == "VeryLongStringHere"));
+    }
+
+    #[test]
+    fn test_extraction_config_default() {
+        let config = ExtractionConfig::default();
+        assert_eq!(config.min_length, 4);
+        assert_eq!(config.max_length, 4096);
+        assert_eq!(config.encodings.len(), 2);
+        assert!(config.encodings.contains(&Encoding::Ascii));
+        assert!(config.encodings.contains(&Encoding::Utf8));
+        assert!(config.scan_code_sections);
+        assert!(!config.include_debug);
+        assert_eq!(config.section_priority.len(), 3);
+        assert!(config.include_symbols);
+    }
+
+    #[test]
+    fn test_basic_extractor_extract_from_section() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig::default();
+
+        let section = SectionInfo {
+            name: ".rodata".to_string(),
+            offset: 0,
+            size: 20,
+            rva: Some(0x1000),
+            section_type: SectionType::StringData,
+            is_executable: false,
+            is_writable: false,
+            weight: 1.0,
+        };
+
+        let data = b"Hello World\0Test";
+        let strings = extractor
+            .extract_from_section(data, &section, &config)
+            .unwrap();
+
+        assert_eq!(strings.len(), 2);
+        assert_eq!(strings[0].text, "Hello World");
+        assert_eq!(strings[0].offset, 0);
+        assert_eq!(strings[0].rva, Some(0x1000));
+        assert_eq!(strings[0].section, Some(".rodata".to_string()));
+        assert_eq!(strings[0].encoding, Encoding::Ascii);
+        assert_eq!(strings[1].text, "Test");
+        assert_eq!(strings[1].offset, 12);
+        assert_eq!(strings[1].rva, Some(0x100C));
+    }
+
+    #[test]
+    fn test_basic_extractor_max_length_filtering() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig {
+            max_length: 10,
+            ..Default::default()
+        };
+
+        let section = SectionInfo {
+            name: ".data".to_string(),
+            offset: 0,
+            size: 30,
+            rva: None,
+            section_type: SectionType::WritableData,
+            is_executable: false,
+            is_writable: true,
+            weight: 0.5,
+        };
+
+        let data = b"Short\0VeryLongStringHere";
+        let strings = extractor
+            .extract_from_section(data, &section, &config)
+            .unwrap();
+
+        // Only "Short" should pass max_length filter
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].text, "Short");
+    }
+
+    #[test]
+    fn test_basic_extractor_section_bounds() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig::default();
+
+        let section = SectionInfo {
+            name: ".text".to_string(),
+            offset: 7, // Start after "prefix\0"
+            size: 12,  // "Hello World" is 11 bytes + null terminator
+            rva: Some(0x2000),
+            section_type: SectionType::Code,
+            is_executable: true,
+            is_writable: false,
+            weight: 0.1,
+        };
+
+        let data = b"prefix\0Hello World\0suffix";
+        let strings = extractor
+            .extract_from_section(data, &section, &config)
+            .unwrap();
+
+        // Should find "Hello World" in the section
+        assert!(!strings.is_empty());
+        let hello_world = strings.iter().find(|s| s.text == "Hello World");
+        assert!(hello_world.is_some(), "Should find 'Hello World' string");
+        if let Some(s) = hello_world {
+            assert_eq!(s.offset, 7);
+            assert_eq!(s.rva, Some(0x2000));
+        }
+    }
+
+    #[test]
+    fn test_basic_extractor_empty_section() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig::default();
+
+        let section = SectionInfo {
+            name: ".empty".to_string(),
+            offset: 0,
+            size: 0,
+            rva: None,
+            section_type: SectionType::Other,
+            is_executable: false,
+            is_writable: false,
+            weight: 0.0,
+        };
+
+        let data = b"";
+        let strings = extractor
+            .extract_from_section(data, &section, &config)
+            .unwrap();
+
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_basic_extractor_section_out_of_bounds() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig::default();
+
+        let section = SectionInfo {
+            name: ".invalid".to_string(),
+            offset: 1000,
+            size: 100,
+            rva: None,
+            section_type: SectionType::Other,
+            is_executable: false,
+            is_writable: false,
+            weight: 0.0,
+        };
+
+        let data = b"small data";
+        let strings = extractor
+            .extract_from_section(data, &section, &config)
+            .unwrap();
+
+        assert!(strings.is_empty());
+    }
+
+    #[test]
+    fn test_basic_extractor_utf8_encoding() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig::default();
+
+        let section = SectionInfo {
+            name: ".rodata".to_string(),
+            offset: 0,
+            size: 20,
+            rva: None,
+            section_type: SectionType::StringData,
+            is_executable: false,
+            is_writable: false,
+            weight: 1.0,
+        };
+
+        let data = "Hello 世界".as_bytes();
+        let strings = extractor
+            .extract_from_section(data, &section, &config)
+            .unwrap();
+
+        // Should extract UTF-8 string "Hello 世界"
+        // Note: ASCII extractor may also extract "Hello " as a prefix, but UTF-8 extractor
+        // will extract the full "Hello 世界" string. We check for the UTF-8 string.
+        let utf8_strings: Vec<_> = strings
+            .iter()
+            .filter(|s| s.encoding == Encoding::Utf8 && s.text == "Hello 世界")
+            .collect();
+        assert_eq!(
+            utf8_strings.len(),
+            1,
+            "Should find UTF-8 string 'Hello 世界', found {} strings total",
+            strings.len()
+        );
+        assert_eq!(utf8_strings[0].text, "Hello 世界");
+        assert_eq!(utf8_strings[0].encoding, Encoding::Utf8);
+    }
+
+    #[test]
+    fn test_basic_extractor_encoding_filtering() {
+        let extractor = BasicExtractor::new();
+        // Only allow ASCII, exclude UTF-8
+        let config = ExtractionConfig {
+            encodings: vec![Encoding::Ascii],
+            enabled_encodings: vec![Encoding::Ascii],
+            ..Default::default()
+        };
+
+        let section = SectionInfo {
+            name: ".rodata".to_string(),
+            offset: 0,
+            size: 30,
+            rva: None,
+            section_type: SectionType::StringData,
+            is_executable: false,
+            is_writable: false,
+            weight: 1.0,
+        };
+
+        let data = "Hello\0世界\0Test".as_bytes();
+        let strings = extractor
+            .extract_from_section(data, &section, &config)
+            .unwrap();
+
+        // Should only find ASCII strings, not UTF-8
+        // Note: "Hello" and "Test" are ASCII, "世界" is UTF-8 and should be filtered
+        let ascii_strings: Vec<_> = strings
+            .iter()
+            .filter(|s| s.encoding == Encoding::Ascii)
+            .collect();
+        assert_eq!(ascii_strings.len(), 2, "Should find 2 ASCII strings");
+        assert!(ascii_strings.iter().any(|s| s.text == "Hello"));
+        assert!(ascii_strings.iter().any(|s| s.text == "Test"));
+        // UTF-8 string "世界" should be filtered out
+        assert!(!strings.iter().any(|s| s.text.contains("世界")));
+    }
+
+    #[test]
+    fn test_basic_extractor_include_symbols() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig {
+            include_symbols: true,
+            ..Default::default()
+        };
+
+        let section = SectionInfo {
+            name: ".text".to_string(),
+            offset: 0,
+            size: 10,
+            rva: None,
+            section_type: SectionType::Code,
+            is_executable: true,
+            is_writable: false,
+            weight: 0.1,
+        };
+
+        let container_info = ContainerInfo::new(
+            BinaryFormat::Elf,
+            vec![section],
+            vec![
+                ImportInfo {
+                    name: "printf".to_string(),
+                    library: Some("libc.so.6".to_string()),
+                    address: Some(0x1000),
+                    ordinal: None,
+                },
+                ImportInfo {
+                    name: "malloc".to_string(),
+                    library: Some("libc.so.6".to_string()),
+                    address: Some(0x2000),
+                    ordinal: None,
+                },
+            ],
+            vec![
+                ExportInfo {
+                    name: "main".to_string(),
+                    address: 0x3000,
+                    ordinal: None,
+                },
+                ExportInfo {
+                    name: "exported_function".to_string(),
+                    address: 0x4000,
+                    ordinal: None,
+                },
+            ],
+            None,
+        );
+
+        let data = b"test data";
+        let strings = extractor.extract(data, &container_info, &config).unwrap();
+
+        // Should include import and export names
+        let import_strings: Vec<_> = strings
+            .iter()
+            .filter(|s| s.source == StringSource::ImportName)
+            .collect();
+        let export_strings: Vec<_> = strings
+            .iter()
+            .filter(|s| s.source == StringSource::ExportName)
+            .collect();
+
+        assert_eq!(import_strings.len(), 2);
+        assert!(import_strings.iter().any(|s| s.text == "printf"));
+        assert!(import_strings.iter().any(|s| s.text == "malloc"));
+
+        assert_eq!(export_strings.len(), 2);
+        assert!(export_strings.iter().any(|s| s.text == "main"));
+        assert!(export_strings.iter().any(|s| s.text == "exported_function"));
+
+        // Verify import string properties
+        let printf_str = import_strings.iter().find(|s| s.text == "printf").unwrap();
+        assert_eq!(printf_str.encoding, Encoding::Utf8);
+        assert_eq!(printf_str.offset, 0);
+        assert_eq!(printf_str.rva, None);
+        assert_eq!(printf_str.section, None);
+        assert_eq!(printf_str.length, 6);
+
+        // Verify export string properties
+        let main_str = export_strings.iter().find(|s| s.text == "main").unwrap();
+        assert_eq!(main_str.encoding, Encoding::Utf8);
+        assert_eq!(main_str.offset, 0);
+        assert_eq!(main_str.rva, None);
+        assert_eq!(main_str.section, None);
+        assert_eq!(main_str.length, 4);
+    }
+
+    #[test]
+    fn test_basic_extractor_exclude_symbols() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig {
+            include_symbols: false,
+            ..Default::default()
+        };
+
+        let section = SectionInfo {
+            name: ".text".to_string(),
+            offset: 0,
+            size: 10,
+            rva: None,
+            section_type: SectionType::Code,
+            is_executable: true,
+            is_writable: false,
+            weight: 0.1,
+        };
+
+        let container_info = ContainerInfo::new(
+            BinaryFormat::Elf,
+            vec![section],
+            vec![ImportInfo {
+                name: "printf".to_string(),
+                library: Some("libc.so.6".to_string()),
+                address: Some(0x1000),
+                ordinal: None,
+            }],
+            vec![ExportInfo {
+                name: "main".to_string(),
+                address: 0x3000,
+                ordinal: None,
+            }],
+            None,
+        );
+
+        let data = b"test data";
+        let strings = extractor.extract(data, &container_info, &config).unwrap();
+
+        // Should not include import/export names
+        assert!(!strings.iter().any(|s| s.source == StringSource::ImportName));
+        assert!(!strings.iter().any(|s| s.source == StringSource::ExportName));
+    }
+
+    #[test]
+    fn test_basic_extractor_section_filtering() {
+        let extractor = BasicExtractor::new();
+        let config = ExtractionConfig {
+            scan_code_sections: false,
+            include_debug: false,
+            ..Default::default()
+        };
+
+        let code_section = SectionInfo {
+            name: ".text".to_string(),
+            offset: 0,
+            size: 9,
+            rva: None,
+            section_type: SectionType::Code,
+            is_executable: true,
+            is_writable: false,
+            weight: 0.1,
+        };
+
+        let debug_section = SectionInfo {
+            name: ".debug_info".to_string(),
+            offset: 9,
+            size: 10,
+            rva: None,
+            section_type: SectionType::Debug,
+            is_executable: false,
+            is_writable: false,
+            weight: 0.0,
+        };
+
+        let data_section = SectionInfo {
+            name: ".rodata".to_string(),
+            offset: 19,
+            size: 11,
+            rva: None,
+            section_type: SectionType::StringData,
+            is_executable: false,
+            is_writable: false,
+            weight: 1.0,
+        };
+
+        let data = b"CodeData\0DebugData\0RoDataTest";
+        let container_info = ContainerInfo::new(
+            BinaryFormat::Elf,
+            vec![code_section, debug_section, data_section],
+            vec![],
+            vec![],
+            None,
+        );
+
+        let strings = extractor.extract(data, &container_info, &config).unwrap();
+
+        // Should only extract from data section, not code or debug
+        assert_eq!(strings.len(), 1);
+        assert_eq!(strings[0].text, "RoDataTest");
+    }
+}
diff --git a/src/extraction/pe_resources.rs b/src/extraction/pe_resources.rs
index 5318a9f..eba163c 100644
--- a/src/extraction/pe_resources.rs
+++ b/src/extraction/pe_resources.rs
@@ -445,6 +445,7 @@ pub fn extract_version_info_strings(data: &[u8]) -> Vec<FoundString> {
                 tags: vec![Tag::Version, Tag::Resource],
                 score: 0,
                 source: StringSource::ResourceString,
+                confidence: 1.0,
             };
             strings.push(found_string);
         });
@@ -600,6 +601,7 @@ pub fn extract_string_table_strings(data: &[u8]) -> Vec<FoundString> {
                     tags: vec![Tag::Resource],
                     score: 0,
                     source: StringSource::ResourceString,
+                    confidence: 1.0,
                 };
                 strings.push(found_string);
             }
@@ -787,6 +789,7 @@ pub fn extract_manifest_strings(data: &[u8]) -> Vec<FoundString> {
                 tags: vec![Tag::Manifest, Tag::Resource],
                 score: 0,
                 source: StringSource::ResourceString,
+                confidence: 1.0,
             };
             strings.push(found_string);
         }
diff --git a/src/lib.rs b/src/lib.rs
index e12e97c..400cfdb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,6 +17,7 @@
 //!
 //! ```rust
 //! use stringy::container::{detect_format, create_parser};
+//! use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
 //!
 //! # fn example() -> stringy::Result<()> {
 //! let data = std::fs::read("binary_file")?;
@@ -27,6 +28,18 @@
 //! println!("Format: {:?}", container_info.format);
 //! println!("Sections: {}", container_info.sections.len());
 //! println!("Imports: {}", container_info.imports.len());
+//!
+//! // Extract strings using the basic extractor
+//! let extractor = BasicExtractor::new();
+//! let config = ExtractionConfig::default();
+//! let strings = extractor.extract(&data, &container_info, &config)?;
+//! println!("Found {} strings", strings.len());
+//!
+//! // ASCII string extraction (foundational encoding type)
+//! use stringy::extraction::{extract_ascii_strings, AsciiExtractionConfig};
+//! let ascii_config = AsciiExtractionConfig::default();
+//! let ascii_strings = extract_ascii_strings(&data, &ascii_config);
+//! println!("Found {} ASCII strings", ascii_strings.len());
 //! # Ok(())
 //! # }
 //! ```
@@ -36,7 +49,8 @@
 //! The library is organized into focused modules:
 //!
 //! - [`container`]: Binary format detection and parsing (✅ Complete)
-//! - [`extraction`]: String extraction algorithms (✅ PE resources complete)
+//! - [`extraction`]: String extraction algorithms (✅ ASCII extraction and PE resources complete)
+//!   - ASCII extraction provides foundational encoding extraction as the reference implementation
 //! - [`classification`]: Semantic analysis and tagging (🚧 Types defined)
 //! - [`output`]: Result formatting (🚧 Interfaces ready)
 //! - [`types`]: Core data structures and error handling (✅ Complete)
@@ -57,3 +71,6 @@ pub use types::{
     ResourceStringEntry, ResourceStringTable, ResourceType, Result, SectionInfo, SectionType,
     StringSource, StringyError, Tag,
 };
+
+// Re-export extraction framework types
+pub use extraction::{AsciiExtractionConfig, BasicExtractor, ExtractionConfig, StringExtractor};
diff --git a/src/types.rs b/src/types.rs
index 5e7209d..bccd80c 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -243,6 +243,26 @@ pub struct FoundString {
     pub score: i32,
     /// Source of the string (section data, import, etc.)
     pub source: StringSource,
+    /// Confidence score from noise filtering (0.0-1.0)
+    ///
+    /// This represents how confident we are that the string is legitimate vs noise.
+    /// A score of 1.0 indicates maximum confidence (e.g., strings from known-good sources
+    /// like imports, exports, resources). Lower scores indicate potential noise that
+    /// may need filtering. This is separate from the `score` field, which is used for
+    /// final ranking (combining section weight, semantic boosts, and noise penalties).
+    pub confidence: f32,
+}
+
+impl FoundString {
+    /// Returns true if confidence is high (>= 0.7)
+    pub fn is_high_confidence(&self) -> bool {
+        self.confidence >= 0.7
+    }
+
+    /// Returns true if confidence is low (< 0.5)
+    pub fn is_low_confidence(&self) -> bool {
+        self.confidence < 0.5
+    }
 }
 
 /// Error types for the stringy library
diff --git a/tests/integration_extraction.rs b/tests/integration_extraction.rs
new file mode 100644
index 0000000..1fd82ba
--- /dev/null
+++ b/tests/integration_extraction.rs
@@ -0,0 +1,510 @@
+use std::fs;
+use stringy::container::{ContainerParser, ElfParser, PeParser};
+use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+use stringy::types::{Encoding, SectionType, StringSource};
+
+fn get_fixture_path(name: &str) -> std::path::PathBuf {
+    std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("tests")
+        .join("fixtures")
+        .join(name)
+}
+
+#[test]
+fn test_basic_extractor_ascii_strings() {
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default();
+
+    // Create test data with embedded ASCII strings
+    let data = b"prefix\0Hello\0World\0Test123\0suffix";
+    let section = stringy::types::SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 7, // Start after "prefix\0"
+        size: 20,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let strings = extractor
+        .extract_from_section(data, &section, &config)
+        .unwrap();
+
+    assert_eq!(strings.len(), 3);
+    assert_eq!(strings[0].text, "Hello");
+    assert_eq!(strings[0].encoding, Encoding::Ascii);
+    assert_eq!(strings[0].source, StringSource::SectionData);
+    assert_eq!(strings[1].text, "World");
+    assert_eq!(strings[2].text, "Test123");
+}
+
+#[test]
+fn test_basic_extractor_utf8_strings() {
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default();
+
+    // Create test data with UTF-8 strings
+    let data = "prefix\0Hello 世界\0Test 测试\0suffix".as_bytes();
+    let section = stringy::types::SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 7,
+        size: 30,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let strings = extractor
+        .extract_from_section(data, &section, &config)
+        .unwrap();
+
+    // Should extract UTF-8 strings "Hello 世界" and "Test 测试"
+    // Note: ASCII extractor may also extract ASCII prefixes, but UTF-8 extractor
+    // will extract the full UTF-8 strings. We check for the UTF-8 strings.
+    let utf8_strings: Vec<_> = strings
+        .iter()
+        .filter(|s| s.encoding == Encoding::Utf8)
+        .collect();
+    assert!(
+        utf8_strings.len() >= 2,
+        "Should find at least 2 UTF-8 strings, found {} UTF-8 strings ({} total)",
+        utf8_strings.len(),
+        strings.len()
+    );
+    assert!(utf8_strings.iter().any(|s| s.text == "Hello 世界"));
+    assert!(utf8_strings.iter().any(|s| s.text == "Test 测试"));
+}
+
+#[test]
+fn test_basic_extractor_min_length_filtering() {
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig {
+        min_length: 4,
+        ..Default::default()
+    };
+
+    let data = b"Hi\0Test\0AB\0LongString\0OK";
+    let section = stringy::types::SectionInfo {
+        name: ".data".to_string(),
+        offset: 0,
+        size: data.len() as u64,
+        rva: None,
+        section_type: SectionType::WritableData,
+        is_executable: false,
+        is_writable: true,
+        weight: 0.5,
+    };
+
+    let strings = extractor
+        .extract_from_section(data, &section, &config)
+        .unwrap();
+
+    // Should only find strings >= 4 characters
+    assert!(strings.iter().all(|s| s.text.len() >= 4));
+    assert!(strings.iter().any(|s| s.text == "Test"));
+    assert!(strings.iter().any(|s| s.text == "LongString"));
+    // "Hi" and "AB" should be filtered out
+    assert!(!strings.iter().any(|s| s.text == "Hi"));
+    assert!(!strings.iter().any(|s| s.text == "AB"));
+}
+
+#[test]
+fn test_basic_extractor_max_length_filtering() {
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default(); // max_length = 4096 by default
+
+    // Create a very long string
+    let long_string = "A".repeat(5000);
+    let data = format!("Short\0{}\0EndTest", long_string).into_bytes();
+    let section = stringy::types::SectionInfo {
+        name: ".data".to_string(),
+        offset: 0,
+        size: data.len() as u64,
+        rva: None,
+        section_type: SectionType::WritableData,
+        is_executable: false,
+        is_writable: true,
+        weight: 0.5,
+    };
+
+    let strings = extractor
+        .extract_from_section(&data, &section, &config)
+        .unwrap();
+
+    // The long string should be filtered out by max_length
+    assert!(strings.iter().any(|s| s.text == "Short"));
+    assert!(strings.iter().any(|s| s.text == "EndTest"));
+    // The 5000-character string should not be present
+    assert!(!strings.iter().any(|s| s.text.len() > 4096));
+}
+
+#[test]
+fn test_basic_extractor_with_elf_fixture() {
+    let fixture_path = get_fixture_path("test_binary_elf");
+    let elf_data = fs::read(&fixture_path)
+        .expect("Failed to read ELF fixture. Run the build script to generate fixtures.");
+
+    // Parse with ElfParser to get ContainerInfo
+    let parser = ElfParser::new();
+    let container_info = parser.parse(&elf_data).expect("Failed to parse ELF");
+
+    // Use BasicExtractor with config that excludes symbols to focus on section data
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig {
+        include_symbols: false,
+        ..Default::default()
+    };
+    let strings = extractor
+        .extract(&elf_data, &container_info, &config)
+        .expect("Failed to extract strings");
+
+    // Verify strings are found
+    assert!(
+        !strings.is_empty(),
+        "Should find some strings in ELF binary"
+    );
+
+    // Verify strings are from appropriate sections
+    for string in &strings {
+        assert_eq!(string.source, StringSource::SectionData);
+        assert!(string.section.is_some());
+        assert!(string.length > 0);
+
+        // Verify encoding is ASCII or UTF-8
+        assert!(
+            matches!(string.encoding, Encoding::Ascii | Encoding::Utf8),
+            "Encoding should be ASCII or UTF-8"
+        );
+
+        // Verify RVA is calculated if section has RVA
+        if let Some(section_name) = &string.section
+            && let Some(section) = container_info
+                .sections
+                .iter()
+                .find(|s| s.name == *section_name)
+            && section.rva.is_some()
+        {
+            assert!(
+                string.rva.is_some(),
+                "RVA should be calculated if section has RVA"
+            );
+        }
+    }
+
+    // Check that we found strings in common string sections
+    let section_names: Vec<&str> = strings
+        .iter()
+        .filter_map(|s| s.section.as_deref())
+        .collect();
+    println!("Found strings in sections: {:?}", section_names);
+}
+
+#[test]
+fn test_basic_extractor_with_pe_fixture() {
+    let fixture_path = get_fixture_path("test_binary_pe.exe");
+    let pe_data = fs::read(&fixture_path)
+        .expect("Failed to read PE fixture. Run the build script to generate fixtures.");
+
+    // Parse with PeParser to get ContainerInfo
+    let parser = PeParser::new();
+    let container_info = parser.parse(&pe_data).expect("Failed to parse PE");
+
+    // Extract strings using BasicExtractor with config that excludes symbols
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig {
+        include_symbols: false,
+        ..Default::default()
+    };
+    let strings = extractor
+        .extract(&pe_data, &container_info, &config)
+        .expect("Failed to extract strings");
+
+    // Verify strings are found
+    assert!(!strings.is_empty(), "Should find some strings in PE binary");
+
+    // Verify all FoundString fields are properly populated
+    for string in &strings {
+        assert!(!string.text.is_empty());
+        assert_eq!(string.source, StringSource::SectionData);
+        assert!(string.section.is_some());
+        assert!(string.length > 0);
+        assert!(matches!(string.encoding, Encoding::Ascii | Encoding::Utf8));
+
+        // Verify offset is within data bounds
+        assert!(
+            string.offset < pe_data.len() as u64,
+            "Offset should be within data bounds"
+        );
+    }
+
+    // Check for strings in common PE sections
+    let has_rdata = strings.iter().any(|s| {
+        s.section
+            .as_ref()
+            .map(|name| name.contains(".rdata") || name.contains(".data"))
+            .unwrap_or(false)
+    });
+    println!("Found strings in .rdata/.data sections: {}", has_rdata);
+}
+
+#[test]
+fn test_basic_extractor_section_filtering() {
+    let fixture_path = get_fixture_path("test_binary_elf");
+    let elf_data = fs::read(&fixture_path)
+        .expect("Failed to read ELF fixture. Run the build script to generate fixtures.");
+
+    let parser = ElfParser::new();
+    let container_info = parser.parse(&elf_data).expect("Failed to parse ELF");
+
+    // Create config that excludes code and debug sections
+    let config = ExtractionConfig {
+        scan_code_sections: false,
+        include_debug: false,
+        ..Default::default()
+    };
+
+    let extractor = BasicExtractor::new();
+    let strings = extractor
+        .extract(&elf_data, &container_info, &config)
+        .expect("Failed to extract strings");
+
+    // Verify no strings from code or debug sections
+    for string in &strings {
+        if let Some(section_name) = &string.section
+            && let Some(section) = container_info
+                .sections
+                .iter()
+                .find(|s| s.name == *section_name)
+        {
+            assert_ne!(
+                section.section_type,
+                SectionType::Code,
+                "Should not extract from code sections"
+            );
+            assert_ne!(
+                section.section_type,
+                SectionType::Debug,
+                "Should not extract from debug sections"
+            );
+        }
+    }
+}
+
+#[test]
+fn test_basic_extractor_empty_data() {
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default();
+
+    let section = stringy::types::SectionInfo {
+        name: ".empty".to_string(),
+        offset: 0,
+        size: 0,
+        rva: None,
+        section_type: SectionType::Other,
+        is_executable: false,
+        is_writable: false,
+        weight: 0.0,
+    };
+
+    let data = b"";
+    let strings = extractor
+        .extract_from_section(data, &section, &config)
+        .unwrap();
+
+    // Should return empty result, not panic
+    assert!(strings.is_empty());
+}
+
+#[test]
+fn test_basic_extractor_boundary_conditions() {
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig::default();
+
+    // Test string at start of section
+    let data1 = b"Start\0middle\0end";
+    let section1 = stringy::types::SectionInfo {
+        name: ".test1".to_string(),
+        offset: 0,
+        size: data1.len() as u64,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+    let strings1 = extractor
+        .extract_from_section(data1, &section1, &config)
+        .unwrap();
+    assert!(strings1.iter().any(|s| s.text == "Start" && s.offset == 0));
+
+    // Test string at end of section
+    let data2 = b"prefix\0middle\0EndTest";
+    let section2 = stringy::types::SectionInfo {
+        name: ".test2".to_string(),
+        offset: 0,
+        size: data2.len() as u64,
+        rva: Some(0x2000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+    let strings2 = extractor
+        .extract_from_section(data2, &section2, &config)
+        .unwrap();
+    assert!(strings2.iter().any(|s| s.text == "EndTest"));
+
+    // Test string spanning entire section
+    let data3 = b"FullSectionString";
+    let section3 = stringy::types::SectionInfo {
+        name: ".test3".to_string(),
+        offset: 0,
+        size: data3.len() as u64,
+        rva: Some(0x3000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+    let strings3 = extractor
+        .extract_from_section(data3, &section3, &config)
+        .unwrap();
+    assert_eq!(strings3.len(), 1);
+    assert_eq!(strings3[0].text, "FullSectionString");
+    assert_eq!(strings3[0].offset, 0);
+    assert_eq!(strings3[0].rva, Some(0x3000));
+}
+
+#[test]
+fn test_extraction_config_defaults() {
+    let config = ExtractionConfig::default();
+
+    // Verify all default values match specification
+    assert_eq!(config.min_length, 4);
+    assert_eq!(config.max_length, 4096);
+    assert_eq!(config.encodings.len(), 2);
+    assert!(config.encodings.contains(&Encoding::Ascii));
+    assert!(config.encodings.contains(&Encoding::Utf8));
+    assert!(config.scan_code_sections);
+    assert!(!config.include_debug);
+    assert_eq!(config.section_priority.len(), 3);
+    assert!(config.section_priority.contains(&SectionType::StringData));
+    assert!(config.section_priority.contains(&SectionType::ReadOnlyData));
+    assert!(config.section_priority.contains(&SectionType::Resources));
+    assert!(config.include_symbols);
+}
+
+#[test]
+fn test_basic_extractor_encoding_filtering() {
+    let extractor = BasicExtractor::new();
+    // Only allow ASCII, exclude UTF-8
+    let config = ExtractionConfig {
+        encodings: vec![Encoding::Ascii],
+        enabled_encodings: vec![Encoding::Ascii],
+        ..Default::default()
+    };
+
+    let section = stringy::types::SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 0,
+        size: 30,
+        rva: None,
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let data = "Hello\0世界\0Test".as_bytes();
+    let strings = extractor
+        .extract_from_section(data, &section, &config)
+        .unwrap();
+
+    // Should only find ASCII strings, not UTF-8
+    assert_eq!(strings.len(), 2);
+    assert_eq!(strings[0].text, "Hello");
+    assert_eq!(strings[0].encoding, Encoding::Ascii);
+    assert_eq!(strings[1].text, "Test");
+    assert_eq!(strings[1].encoding, Encoding::Ascii);
+    // UTF-8 string "世界" should be filtered out
+    assert!(!strings.iter().any(|s| s.text.contains("世界")));
+}
+
+#[test]
+fn test_basic_extractor_include_symbols() {
+    let fixture_path = get_fixture_path("test_binary_elf");
+    let elf_data = fs::read(&fixture_path)
+        .expect("Failed to read ELF fixture. Run the build script to generate fixtures.");
+
+    let parser = ElfParser::new();
+    let container_info = parser.parse(&elf_data).expect("Failed to parse ELF");
+
+    // Extract with symbols included
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig {
+        include_symbols: true,
+        ..Default::default()
+    };
+    let strings = extractor
+        .extract(&elf_data, &container_info, &config)
+        .expect("Failed to extract strings");
+
+    // Should include import and export names
+    let import_strings: Vec<_> = strings
+        .iter()
+        .filter(|s| s.source == StringSource::ImportName)
+        .collect();
+    let export_strings: Vec<_> = strings
+        .iter()
+        .filter(|s| s.source == StringSource::ExportName)
+        .collect();
+
+    // Verify we found some imports/exports
+    assert!(!import_strings.is_empty() || !export_strings.is_empty());
+
+    // Verify import string properties
+    for import_str in &import_strings {
+        assert_eq!(import_str.encoding, Encoding::Utf8);
+        assert_eq!(import_str.offset, 0);
+        assert_eq!(import_str.rva, None);
+        assert_eq!(import_str.section, None);
+        assert!(import_str.length > 0);
+    }
+
+    // Verify export string properties
+    for export_str in &export_strings {
+        assert_eq!(export_str.encoding, Encoding::Utf8);
+        assert_eq!(export_str.offset, 0);
+        assert_eq!(export_str.rva, None);
+        assert_eq!(export_str.section, None);
+        assert!(export_str.length > 0);
+    }
+}
+
+#[test]
+fn test_basic_extractor_exclude_symbols() {
+    let fixture_path = get_fixture_path("test_binary_elf");
+    let elf_data = fs::read(&fixture_path)
+        .expect("Failed to read ELF fixture. Run the build script to generate fixtures.");
+
+    let parser = ElfParser::new();
+    let container_info = parser.parse(&elf_data).expect("Failed to parse ELF");
+
+    // Extract with symbols excluded
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig {
+        include_symbols: false,
+        ..Default::default()
+    };
+    let strings = extractor
+        .extract(&elf_data, &container_info, &config)
+        .expect("Failed to extract strings");
+
+    // Should not include import/export names
+    assert!(!strings.iter().any(|s| s.source == StringSource::ImportName));
+    assert!(!strings.iter().any(|s| s.source == StringSource::ExportName));
+}
diff --git a/tests/test_ascii_extraction.rs b/tests/test_ascii_extraction.rs
new file mode 100644
index 0000000..8c6c3b2
--- /dev/null
+++ b/tests/test_ascii_extraction.rs
@@ -0,0 +1,232 @@
+//! Unit tests for ASCII string extraction
+
+use stringy::extraction::ascii::{
+    AsciiExtractionConfig, extract_ascii_strings, extract_from_section,
+};
+use stringy::types::{Encoding, SectionInfo, SectionType, StringSource};
+
+#[test]
+fn test_basic_extraction() {
+    let data = b"Hello\0World\0Test123";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(data, &config);
+
+    assert_eq!(strings.len(), 3);
+    assert_eq!(strings[0].text, "Hello");
+    assert_eq!(strings[0].offset, 0);
+    assert_eq!(strings[0].encoding, Encoding::Ascii);
+    assert_eq!(strings[0].source, StringSource::SectionData);
+    assert_eq!(strings[0].confidence, 1.0);
+}
+
+#[test]
+fn test_minimum_length_threshold() {
+    let data = b"Hi\0Test\0AB\0LongString";
+    let config = AsciiExtractionConfig::new(4);
+    let strings = extract_ascii_strings(data, &config);
+
+    assert_eq!(strings.len(), 2);
+    assert_eq!(strings[0].text, "Test");
+    assert_eq!(strings[1].text, "LongString");
+}
+
+#[test]
+fn test_null_terminated_strings() {
+    let data = b"First\0Second\0Third";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(data, &config);
+
+    assert_eq!(strings.len(), 3);
+    assert_eq!(strings[0].text, "First");
+    assert_eq!(strings[1].text, "Second");
+    assert_eq!(strings[2].text, "Third");
+}
+
+#[test]
+fn test_mixed_printable_nonprintable() {
+    let data = b"Hello\x00World\x01Test";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(data, &config);
+
+    assert_eq!(strings.len(), 3);
+    assert_eq!(strings[0].text, "Hello");
+    assert_eq!(strings[1].text, "World");
+    assert_eq!(strings[2].text, "Test");
+}
+
+#[test]
+fn test_empty_input() {
+    let data = b"";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(data, &config);
+
+    assert!(strings.is_empty());
+}
+
+#[test]
+fn test_no_valid_strings() {
+    let data = &[0x00, 0xFF, 0x01, 0x02, 0x03];
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(data, &config);
+
+    assert!(strings.is_empty());
+}
+
+#[test]
+fn test_string_at_section_boundary() {
+    let section = SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 7,
+        size: 12,
+        rva: Some(0x2000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let data = b"prefix\0Hello World\0suffix";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+    assert!(!strings.is_empty());
+    let hello_world = strings.iter().find(|s| s.text == "Hello World");
+    assert!(hello_world.is_some());
+    if let Some(s) = hello_world {
+        assert_eq!(s.offset, 7);
+        assert_eq!(s.rva, Some(0x2000));
+        assert_eq!(s.section, Some(".rodata".to_string()));
+    }
+}
+
+#[test]
+fn test_very_long_string() {
+    let long_string = "A".repeat(500);
+    let data = format!("{}\0Short", long_string).into_bytes();
+    let config = AsciiExtractionConfig {
+        max_length: Some(200),
+        ..Default::default()
+    };
+    let strings = extract_ascii_strings(&data, &config);
+
+    assert_eq!(strings.len(), 1);
+    assert_eq!(strings[0].text, "Short");
+}
+
+#[test]
+fn test_single_character_sequences() {
+    let data = b"A\0Test\0B\0C";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(data, &config);
+
+    assert_eq!(strings.len(), 1);
+    assert_eq!(strings[0].text, "Test");
+}
+
+#[test]
+fn test_different_section_types() {
+    let rodata_section = SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 0,
+        size: 20,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let data_section = SectionInfo {
+        name: ".data".to_string(),
+        offset: 0,
+        size: 20,
+        rva: Some(0x2000),
+        section_type: SectionType::WritableData,
+        is_executable: false,
+        is_writable: true,
+        weight: 0.5,
+    };
+
+    let data = b"Hello World\0Test";
+    let config = AsciiExtractionConfig::default();
+
+    let rodata_strings = extract_from_section(&rodata_section, data, &config, None, false, 0.5);
+    let data_strings = extract_from_section(&data_section, data, &config, None, false, 0.5);
+
+    assert_eq!(rodata_strings.len(), 2);
+    assert_eq!(data_strings.len(), 2);
+
+    for string in &rodata_strings {
+        assert_eq!(string.section, Some(".rodata".to_string()));
+    }
+
+    for string in &data_strings {
+        assert_eq!(string.section, Some(".data".to_string()));
+    }
+}
+
+#[test]
+fn test_section_metadata_attachment() {
+    let section = SectionInfo {
+        name: ".custom".to_string(),
+        offset: 0,
+        size: 20,
+        rva: Some(0x3000),
+        section_type: SectionType::ReadOnlyData,
+        is_executable: false,
+        is_writable: false,
+        weight: 0.8,
+    };
+
+    let data = b"Test String\0Another";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_from_section(&section, data, &config, None, false, 0.5);
+
+    for string in &strings {
+        assert_eq!(string.section, Some(".custom".to_string()));
+        assert!(string.rva.is_some());
+        assert!(string.rva.unwrap() >= 0x3000);
+    }
+}
+
+#[test]
+fn test_custom_minimum_length() {
+    let data = b"Test\0Hello\0AB";
+    let config = AsciiExtractionConfig::new(5);
+    let strings = extract_ascii_strings(data, &config);
+
+    assert_eq!(strings.len(), 1);
+    assert_eq!(strings[0].text, "Hello");
+}
+
+#[test]
+fn test_noise_filtering_disabled() {
+    // This test verifies that extraction works even when noise filtering is conceptually disabled
+    // (by setting confidence to 1.0 for all extracted strings)
+    let data = b"Hello\0AAAA\0World";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(data, &config);
+
+    // All strings should be extracted with confidence 1.0
+    assert_eq!(strings.len(), 3);
+    for string in &strings {
+        assert_eq!(string.confidence, 1.0);
+    }
+}
+
+#[test]
+fn test_configuration_customization() {
+    let config = AsciiExtractionConfig {
+        min_length: 8,
+        max_length: Some(50),
+    };
+
+    let data = b"Short\0VeryLongStringHere\0MediumLength";
+    let strings = extract_ascii_strings(data, &config);
+
+    // "VeryLongStringHere" (18 chars) and "MediumLength" (12 chars) should pass (length >= 8 and <= 50)
+    // "Short" (5 chars) should be filtered out (length < 8)
+    assert_eq!(strings.len(), 2);
+    assert!(strings.iter().any(|s| s.text == "VeryLongStringHere"));
+    assert!(strings.iter().any(|s| s.text == "MediumLength"));
+}
diff --git a/tests/test_ascii_integration.rs b/tests/test_ascii_integration.rs
new file mode 100644
index 0000000..c227d81
--- /dev/null
+++ b/tests/test_ascii_integration.rs
@@ -0,0 +1,430 @@
+//! Integration tests for ASCII extraction with noise filtering
+
+use insta::assert_snapshot;
+use std::fs;
+use stringy::container::{ContainerParser, PeParser};
+use stringy::extraction::ascii::{
+    AsciiExtractionConfig, extract_ascii_strings, extract_from_section,
+};
+use stringy::extraction::config::NoiseFilterConfig;
+use stringy::extraction::filters::{CompositeNoiseFilter, FilterContext};
+use stringy::extraction::{BasicExtractor, ExtractionConfig, StringExtractor};
+use stringy::types::{BinaryFormat, ContainerInfo, SectionInfo, SectionType};
+
+fn get_fixture_path(name: &str) -> std::path::PathBuf {
+    std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("tests")
+        .join("fixtures")
+        .join(name)
+}
+
+#[test]
+#[ignore] // Requires test_binary_pe.exe fixture
+fn test_ascii_extraction_from_binary() {
+    let fixture_path = get_fixture_path("test_binary_pe.exe");
+    if !fixture_path.exists() {
+        return;
+    }
+
+    let pe_data = fs::read(&fixture_path).expect("Failed to read PE fixture");
+    let parser = PeParser::new();
+    let container_info = parser.parse(&pe_data).expect("Failed to parse PE");
+
+    // Extract ASCII strings from each section
+    let config = AsciiExtractionConfig::default();
+    let mut all_strings = Vec::new();
+
+    for section in &container_info.sections {
+        if section.size > 0 {
+            let section_data = &pe_data[section.offset as usize..]
+                .get(..section.size as usize)
+                .unwrap_or(&[]);
+            let strings = extract_ascii_strings(section_data, &config);
+            all_strings.extend(strings);
+        }
+    }
+
+    // Verify that legitimate strings are extracted
+    assert!(
+        !all_strings.is_empty(),
+        "Should extract some strings from binary"
+    );
+
+    // Verify all strings have confidence set
+    for string in &all_strings {
+        assert!(string.confidence >= 0.0 && string.confidence <= 1.0);
+    }
+}
+
+#[test]
+fn test_false_positive_reduction() {
+    // Create test data with known noise patterns
+    let noise_data = b"AAAA\x00\x00\x00\x00!!!@@@###\0Hello World\0Test123";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(noise_data, &config);
+
+    // Apply noise filtering
+    let filter_config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&filter_config);
+    let context = FilterContext::default();
+
+    let mut filtered_strings = Vec::new();
+    for string in &strings {
+        let confidence = filter.calculate_confidence(&string.text, &context);
+        if confidence >= 0.5 {
+            filtered_strings.push((string.text.clone(), confidence));
+        }
+    }
+
+    // Verify that noise is filtered out or marked with low confidence
+    let noise_strings: Vec<_> = strings
+        .iter()
+        .filter(|s| s.text == "AAAA" || s.text == "!!!@@@###")
+        .collect();
+
+    for noise_string in noise_strings {
+        let confidence = filter.calculate_confidence(&noise_string.text, &context);
+        assert!(
+            confidence < 0.5,
+            "Noise string '{}' should have low confidence: {}",
+            noise_string.text,
+            confidence
+        );
+    }
+}
+
+#[test]
+fn test_true_positive_retention() {
+    // Create test data with known legitimate strings
+    let legitimate_data =
+        b"Hello World\0Error: file not found\0C:\\Windows\\System32\0https://example.com";
+    let config = AsciiExtractionConfig::default();
+    let strings = extract_ascii_strings(legitimate_data, &config);
+
+    // Apply noise filtering
+    let filter_config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&filter_config);
+    let context = FilterContext::default();
+
+    let mut retained_count = 0;
+    for string in &strings {
+        let confidence = filter.calculate_confidence(&string.text, &context);
+        if confidence >= 0.5 {
+            retained_count += 1;
+        }
+    }
+
+    // Verify that legitimate strings are retained (target: >95%)
+    let retention_rate = retained_count as f32 / strings.len() as f32;
+    assert!(
+        retention_rate > 0.95,
+        "True positive retention rate should be >95%, got {}%",
+        retention_rate * 100.0
+    );
+}
+
+#[test]
+fn test_performance_overhead() {
+    // Measure extraction time with and without noise filtering
+    let test_data = b"Hello World\0Test String\0Another String\0".repeat(1000);
+    let config = AsciiExtractionConfig::default();
+
+    // Time extraction without filtering
+    let start = std::time::Instant::now();
+    let strings = extract_ascii_strings(&test_data, &config);
+    let extraction_time = start.elapsed();
+
+    // Time filtering
+    let filter_config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&filter_config);
+    let context = FilterContext::default();
+
+    let start = std::time::Instant::now();
+    for string in &strings {
+        let _ = filter.calculate_confidence(&string.text, &context);
+    }
+    let filtering_time = start.elapsed();
+
+    // Verify that overhead is reasonable
+    // Note: In debug builds with small test data, filtering may appear slower
+    // The <10% overhead target is for optimized release builds with realistic data sizes
+    // For this test, we just verify that filtering completes in reasonable time
+    let total_time = extraction_time + filtering_time;
+    assert!(
+        total_time.as_secs_f64() < 1.0,
+        "Total extraction+filtering time should be <1s, got {:?} (extraction: {:?}, filtering: {:?})",
+        total_time,
+        extraction_time,
+        filtering_time
+    );
+
+    // In release mode, verify the <10% overhead target
+    #[cfg(not(debug_assertions))]
+    {
+        let overhead_ratio = if extraction_time.as_secs_f64() > 0.0 {
+            filtering_time.as_secs_f64() / extraction_time.as_secs_f64()
+        } else {
+            0.0
+        };
+        assert!(
+            overhead_ratio < 0.1,
+            "Filtering overhead should be <10% of extraction time in release mode, got {}%",
+            overhead_ratio * 100.0
+        );
+    }
+}
+
+#[test]
+#[ignore] // Requires test_binary_pe.exe fixture
+fn test_snapshot_extraction() {
+    let fixture_path = get_fixture_path("test_binary_pe.exe");
+    if !fixture_path.exists() {
+        return;
+    }
+
+    let pe_data = fs::read(&fixture_path).expect("Failed to read PE fixture");
+    let parser = PeParser::new();
+    let container_info = parser.parse(&pe_data).expect("Failed to parse PE");
+
+    let config = AsciiExtractionConfig::default();
+    let mut all_strings = Vec::new();
+
+    for section in &container_info.sections {
+        if section.size > 0 && section.section_type == SectionType::StringData {
+            let section_data = &pe_data[section.offset as usize..]
+                .get(..section.size as usize)
+                .unwrap_or(&[]);
+            let strings = extract_ascii_strings(section_data, &config);
+            all_strings.extend(strings);
+        }
+    }
+
+    // Create snapshot of extracted strings
+    let mut output = String::new();
+    for string in &all_strings {
+        output.push_str(&format!(
+            "{}:{}:{}\n",
+            string.text, string.offset, string.confidence
+        ));
+    }
+
+    assert_snapshot!("ascii_extraction_snapshot", output);
+}
+
+#[test]
+fn test_section_context_awareness() {
+    // Test that section context affects filtering
+    let high_weight_section = SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 0,
+        size: 20,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let low_weight_section = SectionInfo {
+        name: ".text".to_string(),
+        offset: 0,
+        size: 20,
+        rva: Some(0x2000),
+        section_type: SectionType::Code,
+        is_executable: true,
+        is_writable: false,
+        weight: 0.1,
+    };
+
+    let data = b"Hello World\0Test";
+    let config = AsciiExtractionConfig::default();
+
+    let filter_config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&filter_config);
+
+    let high_weight_context = FilterContext::from_section(&high_weight_section);
+    let low_weight_context = FilterContext::from_section(&low_weight_section);
+
+    let strings = extract_ascii_strings(data, &config);
+
+    for string in &strings {
+        let high_score = filter.calculate_confidence(&string.text, &high_weight_context);
+        let low_score = filter.calculate_confidence(&string.text, &low_weight_context);
+
+        // Strings in high-weight sections should generally have higher confidence
+        assert!(
+            high_score >= low_score,
+            "High-weight section should have equal or higher confidence"
+        );
+    }
+}
+
+#[test]
+fn test_full_extraction_path_with_filtering() {
+    // Test the full extraction path with filtering enabled using BasicExtractor
+    let section = SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 0,
+        size: 50,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    // Mix of legitimate strings and noise
+    let data = b"Hello World\0AAAA\0Error: file not found\0!!!@@@###\0Test123";
+
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig {
+        noise_filtering_enabled: true,
+        min_confidence_threshold: 0.5,
+        ..Default::default()
+    };
+
+    let container_info = ContainerInfo::new(
+        BinaryFormat::Elf,
+        vec![section.clone()],
+        vec![],
+        vec![],
+        None,
+    );
+
+    let strings = extractor.extract(data, &container_info, &config).unwrap();
+
+    // Verify that filtering is applied (confidence scores are computed)
+    assert!(!strings.is_empty(), "Should extract some strings");
+
+    // Verify all strings have confidence scores in valid range
+    for string in &strings {
+        assert!(
+            string.confidence >= 0.0 && string.confidence <= 1.0,
+            "String '{}' should have confidence in [0.0, 1.0], got {}",
+            string.text,
+            string.confidence
+        );
+    }
+
+    // Verify that strings with confidence >= threshold are retained
+    let retained_strings: Vec<_> = strings
+        .iter()
+        .filter(|s| s.confidence >= config.min_confidence_threshold)
+        .collect();
+
+    assert!(
+        !retained_strings.is_empty(),
+        "Should retain at least some strings with confidence >= threshold"
+    );
+
+    // Verify that legitimate strings are likely to be retained
+    let legitimate_strings: Vec<_> = strings
+        .iter()
+        .filter(|s| {
+            s.text == "Hello World" || s.text == "Error: file not found" || s.text == "Test123"
+        })
+        .collect();
+
+    // At least some legitimate strings should be retained
+    let retained_legitimate: Vec<_> = legitimate_strings
+        .iter()
+        .filter(|s| s.confidence >= config.min_confidence_threshold)
+        .collect();
+
+    assert!(
+        !retained_legitimate.is_empty(),
+        "At least one legitimate string should be retained, found {}",
+        retained_legitimate.len()
+    );
+}
+
+#[test]
+fn test_extraction_with_filtering_disabled() {
+    // Test that filtering can be disabled
+    let section = SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 0,
+        size: 30,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let data = b"Hello World\0AAAA\0Test123";
+
+    let extractor = BasicExtractor::new();
+    let config = ExtractionConfig {
+        noise_filtering_enabled: false,
+        ..Default::default()
+    };
+
+    let container_info = ContainerInfo::new(BinaryFormat::Elf, vec![section], vec![], vec![], None);
+
+    let strings = extractor.extract(data, &container_info, &config).unwrap();
+
+    // When filtering is disabled, all strings should be included
+    assert!(
+        strings.len() >= 3,
+        "All strings should be included when filtering is disabled, found {}",
+        strings.len()
+    );
+
+    // All strings should have confidence 1.0 when filtering is disabled
+    for string in &strings {
+        assert_eq!(
+            string.confidence, 1.0,
+            "String '{}' should have confidence 1.0 when filtering is disabled, got {}",
+            string.text, string.confidence
+        );
+    }
+}
+
+#[test]
+fn test_extract_from_section_with_filtering() {
+    // Test extract_from_section with filtering enabled
+    let section = SectionInfo {
+        name: ".rodata".to_string(),
+        offset: 0,
+        size: 40,
+        rva: Some(0x1000),
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        weight: 1.0,
+    };
+
+    let data = b"Hello World\0AAAA\0Test123";
+    let config = AsciiExtractionConfig::default();
+    let noise_config = Some(NoiseFilterConfig::default());
+
+    let strings = extract_from_section(
+        &section,
+        data,
+        &config,
+        noise_config.as_ref(),
+        true, // filtering enabled
+        0.5,  // threshold
+    );
+
+    // Verify noise is filtered
+    let has_noise = strings.iter().any(|s| s.text == "AAAA");
+    assert!(!has_noise, "Noise string 'AAAA' should be filtered out");
+
+    // Verify legitimate strings are retained
+    let has_legitimate = strings
+        .iter()
+        .any(|s| s.text == "Hello World" || s.text == "Test123");
+    assert!(has_legitimate, "Legitimate strings should be retained");
+
+    // Verify confidence scores are set
+    for string in &strings {
+        assert!(
+            string.confidence >= 0.5,
+            "String '{}' should have confidence >= 0.5, got {}",
+            string.text,
+            string.confidence
+        );
+    }
+}
diff --git a/tests/test_noise_filters.rs b/tests/test_noise_filters.rs
new file mode 100644
index 0000000..b829659
--- /dev/null
+++ b/tests/test_noise_filters.rs
@@ -0,0 +1,348 @@
+//! Unit tests for noise filtering heuristics
+
+use stringy::extraction::config::{FilterWeights, NoiseFilterConfig};
+use stringy::extraction::filters::{
+    CharDistributionFilter, CompositeNoiseFilter, ContextFilter, EntropyFilter, FilterContext,
+    LengthFilter, LinguisticFilter, NoiseFilter, RepetitionFilter,
+};
+use stringy::types::SectionType;
+
+#[test]
+fn test_char_distribution_filter_all_punctuation() {
+    let filter = CharDistributionFilter;
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("!!!@@@###$$$", &context);
+    assert!(score < 0.5, "All punctuation should have low confidence");
+}
+
+#[test]
+fn test_char_distribution_filter_repeated_character() {
+    let filter = CharDistributionFilter;
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("AAAA", &context);
+    assert!(score < 0.5, "Repeated character should have low confidence");
+}
+
+#[test]
+fn test_char_distribution_filter_normal_text() {
+    let filter = CharDistributionFilter;
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Hello, World!", &context);
+    assert!(score > 0.7, "Normal text should have high confidence");
+}
+
+#[test]
+fn test_char_distribution_filter_mixed_alphanumeric() {
+    let filter = CharDistributionFilter;
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Test123", &context);
+    assert!(
+        score > 0.5,
+        "Mixed alphanumeric should have reasonable confidence"
+    );
+}
+
+#[test]
+fn test_entropy_filter_low_entropy() {
+    let filter = EntropyFilter::new(1.5, 7.5);
+    let context = FilterContext::default();
+
+    // Low entropy (repetition)
+    let score = filter.calculate_confidence("AAAA", &context);
+    assert!(score < 0.5, "Low entropy should have low confidence");
+}
+
+#[test]
+fn test_entropy_filter_high_entropy() {
+    let filter = EntropyFilter::new(1.5, 7.5);
+    let context = FilterContext::default();
+
+    // High entropy (random-like)
+    // Note: This string may not always have entropy > 7.5 due to repetition of patterns
+    // The test verifies that very high entropy strings get lower confidence than normal text
+    let random = "!@#$%^&*()_+-=[]{}|;':\",./<>?`~abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    let random_confidence = filter.calculate_confidence(random, &context);
+    // High entropy strings should have lower confidence than normal text
+    let normal_confidence = filter.calculate_confidence("Hello, World!", &context);
+    assert!(
+        random_confidence < normal_confidence,
+        "High entropy string should have lower confidence than normal text (random: {}, normal: {})",
+        random_confidence,
+        normal_confidence
+    );
+}
+
+#[test]
+fn test_entropy_filter_normal_text() {
+    let filter = EntropyFilter::new(1.5, 7.5);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Hello, World!", &context);
+    assert!(score > 0.5, "Normal text should have reasonable confidence");
+}
+
+#[test]
+fn test_entropy_filter_edge_cases() {
+    let filter = EntropyFilter::new(1.5, 7.5);
+    let context = FilterContext::default();
+
+    // Test at threshold boundaries
+    let score1 = filter.calculate_confidence("\x00\x00\x00\x00", &context);
+    assert!(score1 < 0.5);
+
+    let score2 = filter.calculate_confidence("Error: file not found", &context);
+    assert!(score2 > 0.5);
+}
+
+#[test]
+fn test_linguistic_filter_english_like() {
+    let filter = LinguisticFilter::new(0.1, 0.9);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Hello world", &context);
+    assert!(score > 0.7, "English-like text should have high confidence");
+}
+
+#[test]
+fn test_linguistic_filter_consonant_heavy() {
+    let filter = LinguisticFilter::new(0.1, 0.9);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("bcdfghjklmnpqrstvwxyz", &context);
+    assert!(score < 0.7, "Consonant-heavy should have lower confidence");
+}
+
+#[test]
+fn test_linguistic_filter_vowel_heavy() {
+    let filter = LinguisticFilter::new(0.1, 0.9);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("aeiouaeiou", &context);
+    assert!(score < 0.7, "Vowel-heavy should have lower confidence");
+}
+
+#[test]
+fn test_linguistic_filter_with_numbers() {
+    let filter = LinguisticFilter::new(0.1, 0.9);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Error 404", &context);
+    assert!(
+        score > 0.5,
+        "Text with numbers should have reasonable confidence"
+    );
+}
+
+#[test]
+fn test_length_filter_very_short() {
+    let filter = LengthFilter::new(200);
+    let context = FilterContext {
+        section_weight: 0.3,
+        ..Default::default()
+    };
+
+    let score = filter.calculate_confidence("Hi", &context);
+    assert!(
+        score < 0.7,
+        "Very short in low-weight section should have lower confidence"
+    );
+}
+
+#[test]
+fn test_length_filter_normal_length() {
+    let filter = LengthFilter::new(200);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Hello", &context);
+    assert!(score > 0.7, "Normal length should have high confidence");
+}
+
+#[test]
+fn test_length_filter_very_long() {
+    let filter = LengthFilter::new(200);
+    let context = FilterContext::default();
+
+    let long_string = "A".repeat(300);
+    let score = filter.calculate_confidence(&long_string, &context);
+    assert!(score < 0.5, "Very long string should have low confidence");
+}
+
+#[test]
+fn test_repetition_filter_repeated_characters() {
+    let filter = RepetitionFilter::new(0.7);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("AAAA", &context);
+    assert!(
+        score < 0.5,
+        "Repeated characters should have low confidence"
+    );
+}
+
+#[test]
+fn test_repetition_filter_repeated_pattern() {
+    let filter = RepetitionFilter::new(0.7);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("abcabcabc", &context);
+    assert!(score < 0.5, "Repeated pattern should have low confidence");
+}
+
+#[test]
+fn test_repetition_filter_normal_string() {
+    let filter = RepetitionFilter::new(0.7);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Hello", &context);
+    assert!(score > 0.7, "Normal string should have high confidence");
+}
+
+#[test]
+fn test_repetition_filter_some_repetition() {
+    let filter = RepetitionFilter::new(0.7);
+    let context = FilterContext::default();
+
+    // "Mississippi" has some repetition but is legitimate
+    let score = filter.calculate_confidence("Mississippi", &context);
+    assert!(
+        score > 0.5,
+        "Some repetition in legitimate text should be acceptable"
+    );
+}
+
+#[test]
+fn test_context_filter_string_data_section() {
+    let filter = ContextFilter;
+    let context = FilterContext {
+        section_type: SectionType::StringData,
+        is_executable: false,
+        is_writable: false,
+        ..Default::default()
+    };
+
+    let score = filter.calculate_confidence("test", &context);
+    assert!(
+        score > 0.8,
+        "String data section should have high confidence"
+    );
+}
+
+#[test]
+fn test_context_filter_code_section() {
+    let filter = ContextFilter;
+    let context = FilterContext {
+        section_type: SectionType::Code,
+        section_weight: 0.1,
+        ..Default::default()
+    };
+
+    let score = filter.calculate_confidence("test", &context);
+    assert!(score < 0.5, "Code section should have lower confidence");
+}
+
+#[test]
+fn test_context_filter_resources_section() {
+    let filter = ContextFilter;
+    let context = FilterContext {
+        section_type: SectionType::Resources,
+        ..Default::default()
+    };
+
+    let score = filter.calculate_confidence("test", &context);
+    assert_eq!(
+        score, 1.0,
+        "Resources section should have maximum confidence"
+    );
+}
+
+#[test]
+fn test_composite_filter_legitimate_string() {
+    let config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&config);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Hello, World!", &context);
+    assert!(
+        score > 0.5,
+        "Legitimate string should have reasonable confidence"
+    );
+}
+
+#[test]
+fn test_composite_filter_noise() {
+    let config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&config);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("AAAA", &context);
+    assert!(score < 0.5, "Noise should have low confidence");
+}
+
+#[test]
+fn test_composite_filter_custom_weights() {
+    let config = NoiseFilterConfig {
+        filter_weights: FilterWeights {
+            entropy_weight: 0.5,
+            char_distribution_weight: 0.3,
+            linguistic_weight: 0.1,
+            length_weight: 0.05,
+            repetition_weight: 0.03,
+            context_weight: 0.02,
+        },
+        ..Default::default()
+    };
+
+    let filter = CompositeNoiseFilter::new(&config);
+    let context = FilterContext::default();
+
+    let score = filter.calculate_confidence("Hello, World!", &context);
+    assert!(score > 0.0, "Should produce a valid score");
+}
+
+#[test]
+fn test_composite_filter_enable_disable() {
+    let config = NoiseFilterConfig::default();
+    let mut filter = CompositeNoiseFilter::new(&config);
+    filter.enable_entropy = false;
+    filter.enable_linguistic = false;
+
+    let context = FilterContext::default();
+    let score = filter.calculate_confidence("Hello", &context);
+    assert!(score > 0.0, "Should work with some filters disabled");
+}
+
+#[test]
+fn test_real_world_scenarios() {
+    let config = NoiseFilterConfig::default();
+    let filter = CompositeNoiseFilter::new(&config);
+    let context = FilterContext::default();
+
+    // Legitimate strings
+    let legitimate = [
+        "Error: file not found",
+        "Hello, World!",
+        "C:\\Windows\\System32",
+        "https://example.com",
+    ];
+
+    for text in &legitimate {
+        let score = filter.calculate_confidence(text, &context);
+        assert!(
+            score > 0.5,
+            "Legitimate string '{}' should have reasonable confidence",
+            text
+        );
+    }
+
+    // Obvious noise
+    let noise = ["\x00\x00\x00\x00", "AAAA", "!!!@@@###", "00000000"];
+
+    for text in &noise {
+        let score = filter.calculate_confidence(text, &context);
+        assert!(score < 0.5, "Noise '{}' should have low confidence", text);
+    }
+}