Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 28 additions & 129 deletions src/uu/tr/src/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::{
error::Error,
fmt::{Debug, Display},
io::{BufRead, Write},
ops::Not,
};
use uucore::error::UError;

Expand Down Expand Up @@ -125,6 +126,7 @@ impl Sequence {
pub fn solve_set_characters(
set1_str: &[u8],
set2_str: &[u8],
complement_flag: bool,
truncate_set1_flag: bool,
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
let set1 = Self::from_str(set1_str)?;
Expand Down Expand Up @@ -189,6 +191,9 @@ impl Sequence {
},
};
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
Comment thread
BenWiederhake marked this conversation as resolved.
Outdated
Expand Down Expand Up @@ -369,56 +374,28 @@ impl<A: SymbolTranslator, B: SymbolTranslator> SymbolTranslator for ChainedSymbo
#[derive(Debug)]
pub struct DeleteOperation {
set: Vec<u8>,
complement_flag: bool,
}

impl DeleteOperation {
pub fn new(set: Vec<u8>, complement_flag: bool) -> Self {
Self {
set,
complement_flag,
}
pub fn new(set: Vec<u8>) -> Self {
Self { set }
}
}

impl SymbolTranslator for DeleteOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
let found = self.set.iter().any(|sequence| *sequence == current);
if self.complement_flag == found {
Some(current)
} else {
None
}
}
}

pub struct TranslateOperationComplement {
iter: u8,
set2_iter: usize,
set1: Vec<u8>,
set2: Vec<u8>,
translation_map: HashMap<u8, u8>,
}

impl TranslateOperationComplement {
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Self {
Self {
iter: 0,
set2_iter: 0,
set1,
set2,
translation_map: HashMap::new(),
}
// keep if not present in the set
self.set.contains(&current).not().then_some(current)
}
}

#[derive(Debug)]
pub struct TranslateOperationStandard {
pub struct TranslateOperation {
translation_map: HashMap<u8, u8>,
}

impl TranslateOperationStandard {
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
impl TranslateOperation {
pub fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
if let Some(fallback) = set2.last().copied() {
Ok(Self {
translation_map: set1
Expand All @@ -436,122 +413,44 @@ impl TranslateOperationStandard {
}
}

pub enum TranslateOperation {
Standard(TranslateOperationStandard),
Complement(TranslateOperationComplement),
}

impl TranslateOperation {
fn next_complement_char(iter: u8, ignore_list: &[u8]) -> (u8, u8) {
(iter..)
.filter(|c| !ignore_list.iter().any(|s| s == c))
.map(|c| (c + 1, c))
.next()
.expect("exhausted all possible characters")
}
}

impl TranslateOperation {
pub fn new(set1: Vec<u8>, set2: Vec<u8>, complement: bool) -> Result<Self, BadSequence> {
if complement {
Ok(Self::Complement(TranslateOperationComplement::new(
set1, set2,
)))
} else {
Ok(Self::Standard(TranslateOperationStandard::new(set1, set2)?))
}
}
}

impl SymbolTranslator for TranslateOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
match self {
Self::Standard(TranslateOperationStandard { translation_map }) => Some(
translation_map
.iter()
.find_map(|(l, r)| if l.eq(&current) { Some(*r) } else { None })
.unwrap_or(current),
),
Self::Complement(TranslateOperationComplement {
iter,
set2_iter,
set1,
set2,
translation_map,
}) => {
// First, try to see if current char is already mapped
// If so, return the mapped char
// Else, pop from set2
// If we popped something, map the next complement character to this value
// If set2 is empty, we just map the current char directly to fallback --- to avoid looping unnecessarily
if let Some(c) = set1.iter().find(|c| c.eq(&&current)) {
Some(*c)
} else {
while translation_map.get(&current).is_none() {
if let Some(value) = set2.get(*set2_iter) {
let (next_iter, next_key) = Self::next_complement_char(*iter, &*set1);
*iter = next_iter;
*set2_iter = set2_iter.saturating_add(1);
translation_map.insert(next_key, *value);
} else {
translation_map.insert(current, *set2.last().unwrap());
}
}
Some(*translation_map.get(&current).unwrap())
}
}
}
Some(
self.translation_map
.get(&current)
.copied()
.unwrap_or(current),
)
}
}

#[derive(Debug, Clone)]
pub struct SqueezeOperation {
set1: HashSet<u8>,
complement: bool,
previous: Option<u8>,
}

impl SqueezeOperation {
pub fn new(set1: Vec<u8>, complement: bool) -> Self {
pub fn new(set1: Vec<u8>) -> Self {
Self {
set1: set1.into_iter().collect(),
complement,
previous: None,
}
}
}

impl SymbolTranslator for SqueezeOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
if self.complement {
let next = if self.set1.contains(&current) {
Some(current)
} else {
match self.previous {
Some(v) => {
if v.eq(&current) {
None
} else {
Some(current)
}
}
None => Some(current),
}
};
self.previous = Some(current);
next
let next = if self.set1.contains(&current) {
match self.previous {
Some(v) if v == current => None,
_ => Some(current),
}
} else {
let next = if self.set1.contains(&current) {
match self.previous {
Some(v) if v == current => None,
_ => Some(current),
}
} else {
Some(current)
};
self.previous = Some(current);
next
}
Some(current)
};
self.previous = Some(current);
next
}
}

Expand Down
20 changes: 12 additions & 8 deletions src/uu/tr/src/tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,36 +111,40 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let locked_stdout = stdout.lock();
let mut buffered_stdout = BufWriter::new(locked_stdout);

// According to the man page: translating only happens if deleting or if a second set is given
let translating = !delete_flag && sets.len() > 1;
let mut sets_iter = sets.iter().map(|c| c.as_str());
let (set1, set2) = Sequence::solve_set_characters(
sets_iter.next().unwrap_or_default().as_bytes(),
sets_iter.next().unwrap_or_default().as_bytes(),
truncate_set1_flag,
complement_flag,
// if we are not translating then we don't truncate set1
truncate_set1_flag && translating,
)?;

// '*_op' are the operations that need to be applied, in order.
if delete_flag {
if squeeze_flag {
let delete_op = DeleteOperation::new(set1, complement_flag);
let squeeze_op = SqueezeOperation::new(set2, false);
let delete_op = DeleteOperation::new(set1);
let squeeze_op = SqueezeOperation::new(set2);
let op = delete_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} else {
let op = DeleteOperation::new(set1, complement_flag);
let op = DeleteOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
} else if squeeze_flag {
if sets_len < 2 {
let op = SqueezeOperation::new(set1, complement_flag);
let op = SqueezeOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} else {
let translate_op = TranslateOperation::new(set1, set2.clone(), complement_flag)?;
let squeeze_op = SqueezeOperation::new(set2, false);
let translate_op = TranslateOperation::new(set1, set2.clone())?;
let squeeze_op = SqueezeOperation::new(set2);
let op = translate_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
} else {
let op = TranslateOperation::new(set1, set2, complement_flag)?;
let op = TranslateOperation::new(set1, set2)?;
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
Ok(())
Expand Down
53 changes: 52 additions & 1 deletion tests/by-util/test_tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn
// spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer
use crate::common::util::TestScenario;

#[test]
Expand Down Expand Up @@ -1313,3 +1313,54 @@ fn check_regression_class_blank() {
.no_stderr()
.stdout_only("a12b");
}

// Check regression found in https://github.com/uutils/coreutils/issues/6163
#[test]
fn check_regression_issue_6163_no_match() {
new_ucmd!()
.args(&["-c", "-t", "Y", "Z"])
.pipe_in("X\n")
.succeeds()
.no_stderr()
.stdout_only("X\n");
}
Comment thread
BenWiederhake marked this conversation as resolved.
Outdated

#[test]
fn check_regression_issue_6163_match() {
new_ucmd!()
.args(&["-c", "-t", "Y", "Z"])
.pipe_in("\0\n")
.succeeds()
.no_stderr()
.stdout_only("Z\n");
}

#[test]
fn check_ignore_truncate_when_deleting_and_squeezing() {
new_ucmd!()
.args(&["-dts", "asdf", "qwe"])
.pipe_in("asdfqqwweerr\n")
.succeeds()
.no_stderr()
.stdout_only("qwerr\n");
}

#[test]
fn check_ignore_truncate_when_deleting() {
new_ucmd!()
.args(&["-dt", "asdf"])
.pipe_in("asdfqwer\n")
.succeeds()
.no_stderr()
.stdout_only("qwer\n");
}

#[test]
fn check_ignore_truncate_when_squeezing() {
new_ucmd!()
.args(&["-ts", "asdf"])
.pipe_in("aassddffqwer\n")
.succeeds()
.no_stderr()
.stdout_only("asdfqwer\n");
}