Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.2/cargo-dist-installer.sh | sh"
- name: Cache dist
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: cargo-dist-cache
path: ~/.cargo/bin/dist
Expand All @@ -82,7 +82,7 @@ jobs:
cat plan-dist-manifest.json
echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: artifacts-plan-dist-manifest
path: plan-dist-manifest.json
Expand Down Expand Up @@ -135,7 +135,7 @@ jobs:
run: ${{ matrix.install_dist.run }}
# Get the dist-manifest
- name: Fetch local artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
pattern: artifacts-*
path: target/distrib/
Expand All @@ -151,7 +151,7 @@ jobs:
dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
echo "dist ran successfully"
- name: Attest
uses: actions/attest-build-provenance@v2
uses: actions/attest-build-provenance@v3
with:
subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*"
- id: cargo-dist
Expand All @@ -168,7 +168,7 @@ jobs:

cp dist-manifest.json "$BUILD_MANIFEST_NAME"
- name: "Upload artifacts"
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: artifacts-build-local-${{ join(matrix.targets, '_') }}
path: |
Expand All @@ -190,7 +190,7 @@ jobs:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
Expand All @@ -202,7 +202,7 @@ jobs:
shell: bash
# Get all the local artifacts for the global tasks to use (for e.g. checksums)
- name: Fetch local artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
pattern: artifacts-*
path: target/distrib/
Expand Down Expand Up @@ -233,7 +233,7 @@ jobs:
find . -name '*.cdx.xml' | tee -a "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
- name: "Upload artifacts"
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: artifacts-build-global
path: |
Expand All @@ -259,14 +259,14 @@ jobs:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
# Fetch artifacts from scratch-storage
- name: Fetch artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
pattern: artifacts-*
path: target/distrib/
Expand All @@ -279,14 +279,14 @@ jobs:
cat dist-manifest.json
echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
# Overwrite the previous copy
name: artifacts-dist-manifest
path: dist-manifest.json
# Create a GitHub Release while uploading all files to it
- name: "Download GitHub Artifacts"
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
pattern: artifacts-*
path: artifacts
Expand Down Expand Up @@ -326,7 +326,7 @@ jobs:
token: ${{ secrets.HOMEBREW_TAP_TOKEN }}
# So we have access to the formula
- name: Fetch homebrew formulae
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
pattern: artifacts-*
path: Formula/
Expand Down
13 changes: 9 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,17 @@ path = "src/main.rs"

[dependencies]
clap = { version = "4.5.51", features = ["derive"] }
entropy = "0.4.2"
goblin = "0.10.3"
pelite = "0.10"
pelite = "0.10.0"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0"
serde_json = "1.0.145"
thiserror = "2.0.17"

[dev-dependencies]
criterion = "0.7.0"
insta = "1.43"
tempfile = "3.23"
insta = "1.43.2"
tempfile = "3.23.0"

# The profile that 'dist' will build with
[profile.dist]
Expand All @@ -43,3 +44,7 @@ harness = false
[[bench]]
name = "pe"
harness = false

[[bench]]
name = "ascii_extraction"
harness = false
203 changes: 203 additions & 0 deletions benches/ascii_extraction.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
use criterion::{Criterion, criterion_group, criterion_main};
use std::hint::black_box;
use stringy::extraction::ascii::{AsciiExtractionConfig, extract_ascii_strings};
use stringy::extraction::config::NoiseFilterConfig;
use stringy::extraction::filters::{CompositeNoiseFilter, FilterContext};

fn bench_basic_extraction(c: &mut Criterion) {
// Create test data with various string patterns
let test_data =
b"Hello World\0Test String\0Another String\0Binary\x00\x01\x02Data\0More Strings\0"
.repeat(100);
let config = AsciiExtractionConfig::default();

c.bench_function("ascii_extraction_basic", |b| {
b.iter(|| {
let _ = extract_ascii_strings(black_box(&test_data), black_box(&config));
});
});
}

fn bench_filtered_extraction(c: &mut Criterion) {
let test_data =
b"Hello World\0Test String\0Another String\0Binary\x00\x01\x02Data\0More Strings\0"
.repeat(100);
let config = AsciiExtractionConfig::default();
let filter_config = NoiseFilterConfig::default();
let filter = CompositeNoiseFilter::new(&filter_config);
let context = FilterContext::default();

c.bench_function("ascii_extraction_with_filtering", |b| {
b.iter(|| {
let strings = extract_ascii_strings(black_box(&test_data), black_box(&config));
for string in &strings {
let _ = filter.calculate_confidence(black_box(&string.text), black_box(&context));
}
});
});
}

fn bench_individual_filters(c: &mut Criterion) {
use stringy::extraction::filters::{
CharDistributionFilter, ContextFilter, EntropyFilter, LengthFilter, LinguisticFilter,
NoiseFilter, RepetitionFilter,
};

let test_strings = vec![
"Hello, World!",
"AAAA",
"Error: file not found",
"!!!@@@###",
"C:\\Windows\\System32",
];

let char_filter = CharDistributionFilter;
let entropy_filter = EntropyFilter::new(1.5, 7.5);
let linguistic_filter = LinguisticFilter::new(0.1, 0.9);
let length_filter = LengthFilter::new(200);
let repetition_filter = RepetitionFilter::new(0.7);
let context_filter = ContextFilter;
let context = FilterContext::default();

c.bench_function("filter_char_distribution", |b| {
b.iter(|| {
for text in &test_strings {
let _ = char_filter.calculate_confidence(black_box(text), black_box(&context));
}
});
});

c.bench_function("filter_entropy", |b| {
b.iter(|| {
for text in &test_strings {
let _ = entropy_filter.calculate_confidence(black_box(text), black_box(&context));
}
});
});

c.bench_function("filter_linguistic", |b| {
b.iter(|| {
for text in &test_strings {
let _ =
linguistic_filter.calculate_confidence(black_box(text), black_box(&context));
}
});
});

c.bench_function("filter_length", |b| {
b.iter(|| {
for text in &test_strings {
let _ = length_filter.calculate_confidence(black_box(text), black_box(&context));
}
});
});

c.bench_function("filter_repetition", |b| {
b.iter(|| {
for text in &test_strings {
let _ =
repetition_filter.calculate_confidence(black_box(text), black_box(&context));
}
});
});

c.bench_function("filter_context", |b| {
b.iter(|| {
for text in &test_strings {
let _ = context_filter.calculate_confidence(black_box(text), black_box(&context));
}
});
});
}

fn bench_composite_filter(c: &mut Criterion) {
let test_strings = vec![
"Hello, World!",
"AAAA",
"Error: file not found",
"!!!@@@###",
"C:\\Windows\\System32",
"https://example.com",
];

let filter_config = NoiseFilterConfig::default();
let filter = CompositeNoiseFilter::new(&filter_config);
let context = FilterContext::default();

c.bench_function("composite_filter_all_enabled", |b| {
b.iter(|| {
for text in &test_strings {
let _ = filter.calculate_confidence(black_box(text), black_box(&context));
}
});
});

// Test with some filters disabled
// Note: CompositeNoiseFilter doesn't expose a builder pattern, so we create a new one
// with modified enable flags. For this benchmark, we'll just use the default filter.
let filter_partial = CompositeNoiseFilter::new(&filter_config);

c.bench_function("composite_filter_partial", |b| {
b.iter(|| {
for text in &test_strings {
let _ = filter_partial.calculate_confidence(black_box(text), black_box(&context));
}
});
});
}

fn bench_entropy_calculation(c: &mut Criterion) {
use entropy::shannon_entropy;

let test_strings = vec![
"Hello, World!",
"AAAA",
"Error: file not found",
"!!!@@@###",
];

c.bench_function("entropy_shannon_calculation", |b| {
b.iter(|| {
for text in &test_strings {
let _ = shannon_entropy(black_box(text.as_bytes()));
}
});
});
}

fn bench_large_binary(c: &mut Criterion) {
// Create a large binary-like data with embedded strings
let mut large_data = Vec::new();
for i in 0..10000 {
if i % 100 == 0 {
large_data.extend_from_slice(b"Hello World\0");
} else {
large_data.push((i % 256) as u8);
}
}

let config = AsciiExtractionConfig::default();
let filter_config = NoiseFilterConfig::default();
let filter = CompositeNoiseFilter::new(&filter_config);
let context = FilterContext::default();

c.bench_function("large_binary_extraction", |b| {
b.iter(|| {
let strings = extract_ascii_strings(black_box(&large_data), black_box(&config));
for string in &strings {
let _ = filter.calculate_confidence(black_box(&string.text), black_box(&context));
}
});
});
}

criterion_group!(
ascii_extraction_benches,
bench_basic_extraction,
bench_filtered_extraction,
bench_individual_filters,
bench_composite_filter,
bench_entropy_calculation,
bench_large_binary
);
criterion_main!(ascii_extraction_benches);
12 changes: 10 additions & 2 deletions dist-workspace.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ ci = "github"
# The installers to generate for each app
installers = ["shell", "powershell", "homebrew"]
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-unknown-linux-musl", "x86_64-pc-windows-msvc"]
targets = [
"aarch64-apple-darwin",
"aarch64-unknown-linux-gnu",
"x86_64-unknown-linux-gnu",
"x86_64-unknown-linux-musl",
"x86_64-pc-windows-msvc",
]
# A GitHub repo to push Homebrew formulas to
tap = "EvilBit-Labs/homebrew-tap"
# Customize the Homebrew formula name
Expand Down Expand Up @@ -49,4 +55,6 @@ install-success-msg = "Successfully installed Stringy! Ready to start looking at
repository = "EvilBit-Labs/StringyMcStringFace"
[dist.github-action-commits]
"actions/checkout" = "v5"
"actions/download-artifact" = "v5"
"actions/download-artifact" = "v6"
"actions/upload-artifact" = "v5"
"actions/attest-build-provenance" = "v3"
Loading
Loading