Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.90
- uses: dtolnay/rust-toolchain@1.91.0
with:
components: rustfmt, clippy

Expand All @@ -76,7 +76,7 @@ jobs:
- uses: actions/checkout@v5

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.90
uses: dtolnay/rust-toolchain@1.91.0
with:
components: rustfmt, clippy

Expand Down Expand Up @@ -115,7 +115,7 @@ jobs:
- uses: actions/checkout@v5

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.90
uses: dtolnay/rust-toolchain@1.91.0

- name: Install cargo-nextest
uses: taiki-e/install-action@v2
Expand All @@ -135,7 +135,7 @@ jobs:
- uses: actions/checkout@v5

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.90
uses: dtolnay/rust-toolchain@1.91.0
with:
components: llvm-tools

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ jobs:
- uses: actions/checkout@v5

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.90
uses: dtolnay/rust-toolchain@1.91.0

- uses: github/codeql-action/init@v4
with:
languages: rust
languages: rust

- uses: github/codeql-action/autobuild@v4

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/copilot-setup-steps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
- name: Checkout code
uses: actions/checkout@v5

- uses: dtolnay/rust-toolchain@1.90
- uses: dtolnay/rust-toolchain@1.91.0

- name: Install just task runner
uses: taiki-e/install-action@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
uses: actions/checkout@v5

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.90
uses: dtolnay/rust-toolchain@1.91.0
with:
components: rustfmt, clippy

Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.2/cargo-dist-installer.sh | sh"
- name: Cache dist
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/dist
Expand All @@ -82,7 +82,7 @@ jobs:
cat plan-dist-manifest.json
echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v4
with:
name: artifacts-plan-dist-manifest
path: plan-dist-manifest.json
Expand Down Expand Up @@ -151,7 +151,7 @@ jobs:
dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
echo "dist ran successfully"
- name: Attest
uses: actions/attest-build-provenance@v3
uses: actions/attest-build-provenance@v2
with:
subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*"
- id: cargo-dist
Expand All @@ -168,7 +168,7 @@ jobs:

cp dist-manifest.json "$BUILD_MANIFEST_NAME"
- name: "Upload artifacts"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v4
with:
name: artifacts-build-local-${{ join(matrix.targets, '_') }}
path: |
Expand Down Expand Up @@ -233,7 +233,7 @@ jobs:
find . -name '*.cdx.xml' | tee -a "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
- name: "Upload artifacts"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v4
with:
name: artifacts-build-global
path: |
Expand Down Expand Up @@ -279,7 +279,7 @@ jobs:
cat dist-manifest.json
echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v4
with:
# Overwrite the previous copy
name: artifacts-dist-manifest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ jobs:
- uses: actions/checkout@v5

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.90
uses: dtolnay/rust-toolchain@1.91.0

- uses: taiki-e/install-action@v2
with:
tool: cargo-outdated,cargo-dist

- uses: EmbarkStudios/cargo-deny-action@v2
- uses: EmbarkStudios/cargo-deny-action@v2

- name: Run cargo outdated
run: cargo outdated --depth=1 --exit-code=1
Expand Down
43 changes: 31 additions & 12 deletions .kiro/specs/stringy-binary-analyzer/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,48 @@
- Add unit tests for symbol extraction
- _Requirements: 4.2, 4.3_

- [ ] 4. Implement PE section classification
- [x] 4. Implement PE section classification

- Enhance PE parser to classify sections (.rdata, .data) by string likelihood
- Enhance PE parser to classify sections (.rdata, .data) by string likelihood

- Add section weight assignment for PE-specific sections
- Add section weight assignment for PE-specific sections

- Implement basic PE import/export table parsing
- Implement basic PE import/export table parsing ✅

- Add benchmarks and snapshot tests ✅

- _Requirements: 1.2, 1.4_

- [ ] 4.1 Add PE resource extraction foundation
- _Completed: Issue #3_

- [x] 4.1 Add PE resource extraction foundation

- Add pelite dependency to Cargo.toml
- Implement basic PE resource enumeration
- Create framework for extracting VERSIONINFO and STRINGTABLE resources
- Add pelite dependency to Cargo.toml ✅
- Implement basic PE resource enumeration ✅
- Create framework for extracting VERSIONINFO and STRINGTABLE resources ✅
- Add comprehensive unit tests covering edge cases ✅
- _Requirements: 1.2_
- _Completed: Issue #4 - Phase 1 Foundation_

- [ ] 4.2 Implement PE resource string extraction
- [x] 4.2 Implement PE resource string extraction

- Extract strings from VERSIONINFO resources
- Extract strings from STRINGTABLE resources
- Add manifest resource string extraction
- Extract strings from VERSIONINFO resources ✅
- Extract strings from STRINGTABLE resources ✅
- Add manifest resource string extraction ✅
- Implement UTF-16LE decoding utilities ✅
- Add comprehensive unit tests ✅
- Add integration tests with fixtures ✅
- _Requirements: 1.2_
- _Completed: Issue #5 - Phase 2 String Extraction_

**Implementation Notes:**

- VERSIONINFO: Uses pelite's `version_info()` API to extract all StringFileInfo key-value pairs
- STRINGTABLE: Manual parsing of RT_STRING blocks (16 strings per block, UTF-16LE)
- MANIFEST: Encoding detection (UTF-8/UTF-16LE/UTF-16BE) and XML extraction
- All strings tagged appropriately (`Tag::Version`, `Tag::Manifest`, `Tag::Resource`)
- Graceful error handling throughout (returns empty Vec on errors)
- Test coverage includes both unit tests and integration tests with real fixtures

- [ ] 5. Implement Mach-O section classification

Expand Down
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ path = "src/main.rs"
[dependencies]
clap = { version = "4.5.51", features = ["derive"] }
goblin = "0.10.3"
pelite = "0.10"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0"
thiserror = "2.0.17"
Expand All @@ -38,3 +39,7 @@ lto = "thin"
[[bench]]
name = "elf"
harness = false

[[bench]]
name = "pe"
harness = false
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ This project is in active development. Current implementation status:
- ✅ **Container Parsers**: Full section classification with weight-based prioritization
- ✅ **Import/Export Extraction**: Symbol extraction from all supported formats
- ✅ **Section Analysis**: Smart classification of string-rich sections
- ✅ **PE Resource Enumeration**: VERSIONINFO, STRINGTABLE, and MANIFEST resource detection (Phase 1 complete)
- 🚧 **String Extraction**: ASCII/UTF-8 and UTF-16 extraction engines (framework ready)
- 🚧 **Semantic Classification**: URL, domain, path, GUID pattern matching (types defined)
- 🚧 **Ranking System**: Section-aware scoring algorithm (framework in place)
Expand All @@ -188,6 +189,11 @@ The foundation is robust with fully implemented binary format parsers that can:
- PE: `.rdata` (10.0), `.rsrc` (9.0), read-only `.data` (7.0)
- Mach-O: `__TEXT,__cstring` (10.0), `__TEXT,__const` (9.0), `__DATA_CONST` (7.0)
- **Symbol Processing**: Extract and classify import/export names from symbol tables
- **PE Resource Extraction (Phase 1 complete)**:
- VERSIONINFO resource detection
- STRINGTABLE resource detection
- MANIFEST resource detection
- Metadata extraction (type, language, size)
- **Cross-Platform Support**: Handle platform-specific section characteristics and naming
- **Comprehensive Metadata**: Track section offsets, sizes, RVAs, and permissions

Expand Down
105 changes: 105 additions & 0 deletions benches/pe.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use criterion::{Criterion, criterion_group, criterion_main};
use std::hint::black_box;
use stringy::container::{ContainerParser, PeParser};

fn bench_pe_full_parse(c: &mut Criterion) {
// Use the PE test fixture
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("test_binary_pe.exe");

let data = match std::fs::read(&fixture_path) {
Ok(data) => data,
Err(e) => {
eprintln!("Failed to read PE fixture: {}", e);
return;
}
};

// Only benchmark if it's actually a PE file
if !stringy::container::PeParser::detect(&data) {
println!("PE fixture is not a valid PE file, skipping benchmark");
return;
}

let parser = PeParser::new();
c.bench_function("pe_full_parse", |b| {
b.iter(|| {
let _ = parser.parse(black_box(&data));
});
});
}

fn bench_pe_parse_with_imports(c: &mut Criterion) {
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("test_binary_pe.exe");

let data = match std::fs::read(&fixture_path) {
Ok(data) => data,
Err(e) => {
eprintln!("Failed to read PE fixture: {}", e);
return;
}
};

if !stringy::container::PeParser::detect(&data) {
println!("PE fixture is not a valid PE file, skipping benchmark");
return;
}

let parser = PeParser::new();
c.bench_function("pe_parse_with_imports", |b| {
b.iter(|| {
if let Ok(container_info) = parser.parse(black_box(&data)) {
// Access imports to ensure extraction is performed
let _import_count = container_info.imports.len();
let _imports_with_libs = container_info
.imports
.iter()
.filter(|imp| imp.library.is_some())
.count();
}
});
});
}

fn bench_pe_parse_with_exports(c: &mut Criterion) {
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("test_binary_pe.exe");

let data = match std::fs::read(&fixture_path) {
Ok(data) => data,
Err(e) => {
eprintln!("Failed to read PE fixture: {}", e);
return;
}
};

if !stringy::container::PeParser::detect(&data) {
println!("PE fixture is not a valid PE file, skipping benchmark");
return;
}

let parser = PeParser::new();
c.bench_function("pe_parse_with_exports", |b| {
b.iter(|| {
if let Ok(container_info) = parser.parse(black_box(&data)) {
// Access exports to ensure extraction is performed
let _export_count = container_info.exports.len();
}
});
});
}

criterion_group!(
pe_benches,
bench_pe_full_parse,
bench_pe_parse_with_imports,
bench_pe_parse_with_exports
);
criterion_main!(pe_benches);
Loading
Loading