Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/wasi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,7 @@ jobs:
env:
CARGO_TARGET_WASM32_WASIP1_RUNNER: wasmtime
run: |
cargo test --target wasm32-wasip1 --no-default-features -p uu_echo -p uu_cut
# Get all utilities and exclude ones that don't compile for wasm32-wasip1
EXCLUDE="dd|df|du|env|expr|mktemp|more|tac|test"
UTILS=$(./util/show-utils.sh | tr ' ' '\n' | grep -vE "^($EXCLUDE)$" | sed 's/^/-p uu_/' | tr '\n' ' ')
cargo test --target wasm32-wasip1 --no-default-features $UTILS
31 changes: 30 additions & 1 deletion src/uu/sort/src/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub struct ChunkContents<'a> {
pub line_count_hint: usize,
}

#[derive(Debug)]
#[derive(Debug, Default)]
pub struct LineData<'a> {
pub selections: Vec<&'a [u8]>,
pub num_infos: Vec<NumInfo>,
Expand Down Expand Up @@ -431,3 +431,32 @@ fn read_to_buffer<T: Read>(
}
}
}

/// Parse a buffer into a `ChunkContents` suitable for `Chunk::try_new`.
/// Used by the WASI single-threaded sort path.
#[cfg(target_os = "wasi")]
pub fn parse_into_chunk<'a>(
buffer: &'a [u8],
separator: u8,
settings: &GlobalSettings,
) -> ChunkContents<'a> {
let mut lines = Vec::new();
let mut line_data = LineData::default();
let mut token_buffer = Vec::new();
let mut line_count_hint = 0;
parse_lines(
buffer,
&mut lines,
&mut line_data,
&mut token_buffer,
&mut line_count_hint,
separator,
settings,
);
ChunkContents {
lines,
line_data,
token_buffer,
line_count_hint,
}
}
20 changes: 20 additions & 0 deletions src/uu/sort/src/ext_sort/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//! External sort: sort large inputs that may not fit in memory.
//!
//! On most platforms this uses a multi-threaded chunked approach with
//! temporary files. On WASI (no threads) we fall back to an in-memory sort.

#[cfg(not(target_os = "wasi"))]
mod threaded;
#[cfg(not(target_os = "wasi"))]
pub use threaded::ext_sort;

#[cfg(target_os = "wasi")]
mod wasi;
#[cfg(target_os = "wasi")]
// `self::` needed to disambiguate from the `wasi` crate
pub use self::wasi::ext_sort;
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,15 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//! Sort big files by using auxiliary files for storing intermediate chunks.
//!
//! Files are read into chunks of memory which are then sorted individually and
//! written to temporary files. There are two threads: One sorter, and one reader/writer.
//! The buffers for the individual chunks are recycled. There are two buffers.
//! Threaded external sort: read input in chunks, sort them in a background
//! thread, and spill to temporary files when memory is exceeded.

use std::cmp::Ordering;
use std::fs::File;
use std::io::{Write, stderr};
use std::io::{Read, Write, stderr};
use std::path::PathBuf;
use std::{
io::Read,
sync::mpsc::{Receiver, SyncSender},
thread,
};
use std::sync::mpsc::{Receiver, SyncSender};
use std::thread;

use itertools::Itertools;
use uucore::error::{UResult, strip_errno};
Expand All @@ -29,17 +23,20 @@ use crate::merge::WriteablePlainTmpFile;
use crate::merge::WriteableTmpFile;
use crate::tmp_dir::TmpDirWrapper;
use crate::{
GlobalSettings,
GlobalSettings, Line,
chunks::{self, Chunk},
compare_by, merge, sort_by,
compare_by, merge, print_sorted, sort_by,
};
use crate::{Line, print_sorted};

// Note: update `test_sort::test_start_buffer` if this size is changed
// Fixed to 8 KiB (equivalent to `std::sys::io::DEFAULT_BUF_SIZE` on most targets)
const DEFAULT_BUF_SIZE: usize = 8 * 1024;

/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
///
/// Two threads cooperate: one reads input and writes temporary chunk files,
/// while the other sorts each chunk in memory. Once all chunks are written,
/// they are merged back together for final output.
pub fn ext_sort(
files: &mut impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
settings: &GlobalSettings,
Expand Down
59 changes: 59 additions & 0 deletions src/uu/sort/src/ext_sort/wasi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//! WASI single-threaded sort: read all input into memory, sort, and output.
//! Threads are not available on WASI, so we bypass the chunked/threaded path.

use std::cmp::Ordering;
use std::io::Read;

use itertools::Itertools;
use uucore::error::UResult;

use crate::Output;
use crate::chunks::{self, Chunk};
use crate::tmp_dir::TmpDirWrapper;
use crate::{GlobalSettings, compare_by, print_sorted, sort_by};

/// Sort files by reading all input into memory, sorting in a single thread, and outputting directly.
pub fn ext_sort(
files: &mut impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
settings: &GlobalSettings,
output: Output,
_tmp_dir: &mut TmpDirWrapper,
) -> UResult<()> {
let separator = settings.line_ending.into();
// Read all input into memory at once. Unlike the threaded path which uses
// chunked buffered reads, WASI has no threads so we accept the memory cost.
// Note: there is no size limit here — WASI targets are expected to handle
// moderately sized inputs; very large files may cause OOM.
let mut input = Vec::new();
for file in files {
file?.read_to_end(&mut input)?;
}
if input.is_empty() {
return Ok(());
}
let mut chunk = Chunk::try_new(input, |buffer| {
Ok::<_, Box<dyn uucore::error::UError>>(chunks::parse_into_chunk(
buffer, separator, settings,
))
})?;
chunk.with_dependent_mut(|_, contents| {
sort_by(&mut contents.lines, settings, &contents.line_data);
});
if settings.unique {
print_sorted(
chunk.lines().iter().dedup_by(|a, b| {
compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) == Ordering::Equal
}),
settings,
output,
)?;
} else {
print_sorted(chunk.lines().iter(), settings, output)?;
}
Ok(())
}
21 changes: 16 additions & 5 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2148,11 +2148,22 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
settings.buffer_size_is_explicit = false;
}

let mut tmp_dir = TmpDirWrapper::new(
matches
.get_one::<String>(options::TMP_DIR)
.map_or_else(env::temp_dir, PathBuf::from),
);
let mut tmp_dir = TmpDirWrapper::new(matches.get_one::<String>(options::TMP_DIR).map_or_else(
|| {
// WASI does not support std::env::temp_dir() — it panics with
// "no filesystem on wasm". Use /tmp as a nominal fallback;
// the WASI ext_sort path never actually creates temp files.
#[cfg(target_os = "wasi")]
{
PathBuf::from("/tmp")
}
#[cfg(not(target_os = "wasi"))]
{
env::temp_dir()
}
},
PathBuf::from,
));

settings.compress_prog = matches
.get_one::<String>(options::COMPRESS_PROG)
Expand Down
Loading