Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .vscode/cspell.dictionaries/jargon.wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ autogenerated
autogenerates
bitmask
bitwise
bufferram
bytewise
canonicalization
canonicalize
Expand Down Expand Up @@ -45,6 +46,7 @@ fileio
filesystem
filesystems
flamegraph
freeram
fsxattr
fullblock
getfacl
Expand Down Expand Up @@ -123,6 +125,7 @@ shortcode
shortcodes
siginfo
sigusr
strcasecmp
subcommand
subexpression
submodule
Expand All @@ -134,6 +137,7 @@ syscalls
sysconf
tokenize
toolchain
totalram
truthy
ucase
unbuffered
Expand Down
20 changes: 10 additions & 10 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

83 changes: 83 additions & 0 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,8 @@ struct Precomputed {
num_infos_per_line: usize,
floats_per_line: usize,
selections_per_line: usize,
fast_lexicographic: bool,
fast_ascii_insensitive: bool,
}

impl GlobalSettings {
Expand Down Expand Up @@ -341,6 +343,47 @@ impl GlobalSettings {
.iter()
.filter(|s| matches!(s.settings.mode, SortMode::GeneralNumeric))
.count();

self.precomputed.fast_lexicographic = self.can_use_fast_lexicographic();
self.precomputed.fast_ascii_insensitive = self.can_use_fast_ascii_insensitive();
}

/// Returns true when the fast lexicographic path can be used safely.
fn can_use_fast_lexicographic(&self) -> bool {
self.mode == SortMode::Default
&& !self.ignore_case
&& !self.dictionary_order
&& !self.ignore_non_printing
&& !self.ignore_leading_blanks
&& self.selectors.len() == 1
&& {
let selector = &self.selectors[0];
!selector.needs_selection
&& matches!(selector.settings.mode, SortMode::Default)
&& !selector.settings.ignore_case
&& !selector.settings.dictionary_order
&& !selector.settings.ignore_non_printing
&& !selector.settings.ignore_blanks
}
}

/// Returns true when the ASCII case-insensitive fast path is valid.
fn can_use_fast_ascii_insensitive(&self) -> bool {
self.mode == SortMode::Default
&& self.ignore_case
&& !self.dictionary_order
&& !self.ignore_non_printing
&& !self.ignore_leading_blanks
&& self.selectors.len() == 1
&& {
let selector = &self.selectors[0];
!selector.needs_selection
&& matches!(selector.settings.mode, SortMode::Default)
&& selector.settings.ignore_case
&& !selector.settings.dictionary_order
&& !selector.settings.ignore_non_printing
&& !selector.settings.ignore_blanks
}
}
}

Expand Down Expand Up @@ -1643,6 +1686,26 @@ fn compare_by<'a>(
a_line_data: &LineData<'a>,
b_line_data: &LineData<'a>,
) -> Ordering {
if global_settings.precomputed.fast_lexicographic {
let cmp = a.line.cmp(b.line);
return if global_settings.reverse {
cmp.reverse()
} else {
cmp
};
}

if global_settings.precomputed.fast_ascii_insensitive {
let cmp = ascii_case_insensitive_cmp(a.line, b.line);
if cmp != Ordering::Equal || a.line == b.line {
return if global_settings.reverse {
cmp.reverse()
} else {
cmp
};
}
}

let mut selection_index = 0;
let mut num_info_index = 0;
let mut parsed_float_index = 0;
Expand Down Expand Up @@ -1754,6 +1817,26 @@ fn compare_by<'a>(
}
}

/// Compare two byte slices in ASCII case-insensitive order without allocating.
/// We lower each byte on the fly so that binary input (including `NUL`) stays
/// untouched and we avoid locale-sensitive routines such as `strcasecmp`.
fn ascii_case_insensitive_cmp(a: &[u8], b: &[u8]) -> Ordering {
#[inline]
fn lower(byte: u8) -> u8 {
byte.to_ascii_lowercase()
}

for (lhs, rhs) in a.iter().copied().zip(b.iter().copied()) {
let l = lower(lhs);
let r = lower(rhs);
if l != r {
return l.cmp(&r);
}
}

a.len().cmp(&b.len())
}

// This function cleans up the initial comparison done by leading_num_common for a general numeric compare.
// In contrast to numeric compare, GNU general numeric/FP sort *should* recognize positive signs and
// scientific notation, so we strip those lines only after the end of the following numeric string.
Expand Down
116 changes: 90 additions & 26 deletions src/uu/sort/src/tmp_dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::sync::atomic::{AtomicBool, Ordering};
use std::{
fs::File,
path::{Path, PathBuf},
sync::{Arc, Mutex},
sync::{Arc, Mutex, OnceLock},
};

use tempfile::TempDir;
Expand All @@ -29,6 +30,70 @@ pub struct TmpDirWrapper {
lock: Arc<Mutex<()>>,
}

#[derive(Default, Clone)]
struct HandlerRegistration {
lock: Option<Arc<Mutex<()>>>,
path: Option<PathBuf>,
}

fn handler_state() -> Arc<Mutex<HandlerRegistration>> {
// Lazily create the global HandlerRegistration so all TmpDirWrapper instances and the
// SIGINT handler operate on the same lock/path snapshot.
static HANDLER_STATE: OnceLock<Arc<Mutex<HandlerRegistration>>> = OnceLock::new();
HANDLER_STATE
.get_or_init(|| Arc::new(Mutex::new(HandlerRegistration::default())))
.clone()
}

fn ensure_signal_handler_installed(state: Arc<Mutex<HandlerRegistration>>) -> UResult<()> {
// This shared state must originate from `handler_state()` so the handler always sees
// the current lock/path pair and can clean up the active temp directory on SIGINT.
// Install a shared SIGINT handler so the active temp directory is deleted when the user aborts.
// Guard to ensure the SIGINT handler is registered once per process and reused.
static HANDLER_INSTALLED: AtomicBool = AtomicBool::new(false);

if HANDLER_INSTALLED
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
return Ok(());
}

let handler_state = state.clone();
if let Err(e) = ctrlc::set_handler(move || {
// Load the latest lock/path snapshot so the handler cleans the active temp dir.
let (lock, path) = {
let state = handler_state.lock().unwrap();
(state.lock.clone(), state.path.clone())
};

if let Some(lock) = lock {
let _guard = lock.lock().unwrap();
if let Some(path) = path {
if let Err(e) = remove_tmp_dir(&path) {
show_error!(
"{}",
translate!(
"sort-failed-to-delete-temporary-directory",
"error" => e
)
);
}
}
}

std::process::exit(2)
}) {
HANDLER_INSTALLED.store(false, Ordering::Release);
return Err(USimpleError::new(
2,
translate!("sort-failed-to-set-up-signal-handler", "error" => e),
));
}

Ok(())
}

impl TmpDirWrapper {
pub fn new(path: PathBuf) -> Self {
Self {
Expand All @@ -52,31 +117,14 @@ impl TmpDirWrapper {
);

let path = self.temp_dir.as_ref().unwrap().path().to_owned();
let lock = self.lock.clone();
ctrlc::set_handler(move || {
// Take the lock so that `next_file_path` returns no new file path,
// and the program doesn't terminate before the handler has finished
let _lock = lock.lock().unwrap();
if let Err(e) = remove_tmp_dir(&path) {
show_error!(
"{}",
translate!(
"sort-failed-to-delete-temporary-directory",
"error" => e
)
);
}
std::process::exit(2)
})
.map_err(|e| {
USimpleError::new(
2,
translate!(
"sort-failed-to-set-up-signal-handler",
"error" => e
),
)
})
let state = handler_state();
{
let mut guard = state.lock().unwrap();
guard.lock = Some(self.lock.clone());
guard.path = Some(path);
}

ensure_signal_handler_installed(state)
}

pub fn next_file(&mut self) -> UResult<(File, PathBuf)> {
Expand All @@ -100,6 +148,22 @@ impl TmpDirWrapper {
}
}

impl Drop for TmpDirWrapper {
fn drop(&mut self) {
let state = handler_state();
let mut guard = state.lock().unwrap();

if guard
.lock
.as_ref()
.is_some_and(|current| Arc::ptr_eq(current, &self.lock))
{
guard.lock = None;
guard.path = None;
}
}
}

/// Remove the directory at `path` by deleting its child files and then itself.
/// Errors while deleting child files are ignored.
fn remove_tmp_dir(path: &Path) -> std::io::Result<()> {
Expand Down
Loading