Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions changelog/std-string-strip.dd
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
`std.string.strip` now accepts a string of characters to be stripped

New overload functions of $(REF strip, std, string),
$(REF stripLeft, std, string) and
$(REF stripRight, std, string) accepts a string of characters to be
stripped instead of stripping only whitespace.

---
import std.string: stripLeft, stripRight, strip;

assert(stripLeft("www.dlang.org", "w.") == "dlang.org");
assert(stripRight("dlang.org/", "/") == "dlang.org");
assert(strip("www.dlang.org/", "w./") == "dlang.org");
---
249 changes: 243 additions & 6 deletions std/string.d
Original file line number Diff line number Diff line change
Expand Up @@ -2846,13 +2846,16 @@ if (isSomeChar!C)
}

/++
Strips leading whitespace (as defined by $(REF isWhite, std,uni)).
Strips leading whitespace (as defined by $(REF isWhite, std,uni)) or
as specified in the second argument.

Params:
input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
of characters
chars = string of characters to be stripped

Returns: $(D input) stripped of leading whitespace.
Returns: $(D input) stripped of leading whitespace or characters
specified in the second argument.

Postconditions: $(D input) and the returned value
will share the same tail (see $(REF sameTail, std,array)).
Expand Down Expand Up @@ -2920,14 +2923,72 @@ if (isConvertibleToString!Range)
assert(testAliasedString!stripLeft(" hello"));
}

/// Ditto
auto stripLeft(Range, Char)(Range input, const(Char)[] chars)
if (((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
isConvertibleToString!Range) && isSomeChar!Char)
{
static if (isConvertibleToString!Range)
return stripLeft!(StringTypeOf!Range)(input, chars);
else
{
for (; !input.empty; input.popFront)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you use foreach here?
In this case there's any performance penalty of this abstraction.
#6092 (comment)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the catch here is that we need to iterate from the back, not from the front. So we'll either have to use foreach_reverse (ick), or std.range.retro, which then would require passing the range in by reference, since we need to guarantee that the range is actually modified in order to return it to the user. So it'll end up being something ugly like this:

import std.range : retro;
foreach (ch; (&input).retro) { ... }

which is equally ick.

Copy link
Copy Markdown
Member

@quickfur quickfur Jan 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wait, what I said only applies to the stripRight variant. Here we're moving forwards, so there's no foreach_reverse or retro needed.

I'd say the for loop here is justified (using foreach would require passing the range by ref, just in case, so it still entails the ugly &range syntax).

Or perhaps a more self-documenting way to write this would be:

while (!input.empty && chars.indexOf(input.front) != -1)
    input.popFront;

which, at least to me, reads more intuitively as "while the range is not empty and the front element is a character to be stripped, pop the front element".

Copy link
Copy Markdown
Contributor

@wilzbach wilzbach Jan 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it boils down to whether we want to do auto-decoding or not ... which after thinking about this is probably better here because the user-input of chars could consist of utf-8 chars.

{
if (chars.indexOf(input.front) == -1)
break;
}
return input;
}
}

///
@safe pure unittest
{
assert(stripLeft(" hello world ", " ") ==
"hello world ");
assert(stripLeft("xxxxxhello world ", "x") ==
"hello world ");
assert(stripLeft("xxxyy hello world ", "xy ") ==
"hello world ");
}

///
@safe pure unittest
{
import std.array : array;
import std.utf : byChar, byWchar, byDchar;

assert(stripLeft(" xxxyy hello world "w.byChar, "xy ").array ==
"hello world ");

assert(stripLeft("\u2028\u2020hello world\u2028"w.byWchar,
"\u2028").array == "\u2020hello world\u2028");
assert(stripLeft("\U00010001hello world"w.byWchar, " ").array ==
"\U00010001hello world"w);
assert(stripLeft("\U00010001 xyhello world"d.byDchar,
"\U00010001 xy").array == "hello world"d);

assert(stripLeft("\u2020hello"w, "\u2020"w) == "hello"w);
assert(stripLeft("\U00010001hello"d, "\U00010001"d) == "hello"d);
assert(stripLeft(" hello ", "") == " hello ");
}

@safe pure unittest
{
assert(testAliasedString!stripLeft(" xyz hello", "xyz "));
}

/++
Strips trailing whitespace (as defined by $(REF isWhite, std,uni)).
Strips trailing whitespace (as defined by $(REF isWhite, std,uni)) or
as specified in the second argument.

Params:
str = string or random access range of characters
chars = string of characters to be stripped

Returns:
slice of $(D str) stripped of trailing whitespace.
slice of $(D str) stripped of trailing whitespace or characters
specified in the second argument.

See_Also:
Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation)
Expand Down Expand Up @@ -3080,16 +3141,73 @@ if (isConvertibleToString!Range)
cast(void) stripRight(ws.byUTF!wchar).array;
}

/// Ditto
auto stripRight(Range, Char)(Range str, const(Char)[] chars)
if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
isConvertibleToString!Range) && isSomeChar!Char)
{
static if (isConvertibleToString!Range)
return stripRight!(StringTypeOf!Range)(str, chars);
else
{
for (; !str.empty; str.popBack)
{
if (chars.indexOf(str.back) == -1)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you use foreach here?
In this case there's any performance penalty of this abstraction.
#6092 (comment)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we're iterating from the back, so we'd need foreach_reverse or retro if you wanted to use an abstraction.

Or we could write this as:

while (!str.empty && chars.indexOf(str.back) != -1)
    str.popBack;

which to me reads more intuitively as "while the range is not empty and the back is a character to the stripped, pop the back".

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall I change this to use while as suggested?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not important, but I like the for form. Kurz und gut.

Copy link
Copy Markdown
Contributor

@wilzbach wilzbach Jan 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not important, but I like the for form. Kurz und gut.

Yes, I was referring to foreach_reverse:

foreach_reverse (s; str)
    if (chars.indexOf(s) == -1)
        break;

FWIW LDC optimizes foreach_reverse a lot better as it avoid auto-decoding:

https://godbolt.org/g/k9tr4S

_D7example3fooFAyaQdZv:
  sub rsp, 72
  mov qword ptr [rsp + 64], rcx
  mov qword ptr [rsp + 56], rdx
  mov qword ptr [rsp + 40], rdi
  mov qword ptr [rsp + 48], rsi
  mov rcx, qword ptr [rsp + 64]
  mov rdx, qword ptr [rsp + 56]
  mov qword ptr [rsp + 24], rdx
  mov qword ptr [rsp + 32], rcx
  mov rcx, qword ptr [rsp + 24]
  mov qword ptr [rsp + 16], rcx
.LBB0_1:
  mov rax, qword ptr [rsp + 16]
  mov rcx, rax
  sub rcx, 1
  mov qword ptr [rsp + 16], rcx
  cmp rax, 0
  je .LBB0_6
  mov eax, 1
  mov rcx, qword ptr [rsp + 16]
  mov rdx, qword ptr [rsp + 32]
  mov sil, byte ptr [rdx + rcx]
  mov byte ptr [rsp + 15], sil
  movzx esi, byte ptr [rsp + 15]
  mov rdx, qword ptr [rsp + 40]
  mov rcx, qword ptr [rsp + 48]
  mov edi, 1
  mov dword ptr [rsp + 8], eax
  call _D3std6string__T7indexOfTAyaZQnFNaNbNiNfQpxwxEQBs8typecons__T4FlagVQBqa13_6361736553656e736974697665ZQBoZl@PLT
  cmp rax, -1
  jne .LBB0_4
  jmp .LBB0_6
.LBB0_4:
  jmp .LBB0_5
.LBB0_5:
  jmp .LBB0_1
.LBB0_6:
  add rsp, 72
  ret

I hope you notice that it could inline back completely.

For comparison, the for:

_D7example3fooFAyaQdZv:
  push r15
  push r14
  push r12
  push rbx
  sub rsp, 24
  mov r14, rsi
  mov r12, rdi
  mov qword ptr [rsp + 8], rdx
  mov qword ptr [rsp + 16], rcx
  test rdx, rdx
  je .LBB0_9
  mov rdi, rdx
  mov rsi, rcx
  call _D3std5range10primitives__T4backTyaZQjFNaNdNfAyaZw@PLT
  mov edi, 1
  mov esi, eax
  mov rdx, r12
  mov rcx, r14
  call _D3std6string__T7indexOfTAyaZQnFNaNbNiNfQpxwxEQBs8typecons__T4FlagVQBqa13_6361736553656e736974697665ZQBoZl@PLT
  cmp rax, -1
  je .LBB0_9
  lea r15, [rsp + 8]
.LBB0_3:
  mov rbx, qword ptr [rsp + 8]
  test rbx, rbx
  je .LBB0_6
  mov rdi, rbx
  mov rsi, r15
  call _D3std3utf__T10strideBackTAyaZQrFNaNfKQmmZk@PLT
  mov eax, eax
  sub rbx, rax
  cmp rbx, qword ptr [rsp + 8]
  ja .LBB0_5
  mov qword ptr [rsp + 8], rbx
  test rbx, rbx
  je .LBB0_9
  mov rsi, qword ptr [rsp + 16]
  mov rdi, rbx
  call _D3std5range10primitives__T4backTyaZQjFNaNdNfAyaZw@PLT
  mov edi, 1
  mov esi, eax
  mov rdx, r12
  mov rcx, r14
  call _D3std6string__T7indexOfTAyaZQnFNaNbNiNfQpxwxEQBs8typecons__T4FlagVQBqa13_6361736553656e736974697665ZQBoZl@PLT
  cmp rax, -1
  jne .LBB0_3
.LBB0_9:
  add rsp, 24
  pop rbx
  pop r12
  pop r14
  pop r15
  ret
.LBB0_6:
  lea rsi, [rip + .L.str]
  lea rcx, [rip + .L.str.1]
  mov edi, 69
  mov edx, 92
  mov r8d, 2206
  call _d_assert_msg@PLT
.LBB0_5:
  lea rsi, [rip + .L.str.1]
  mov edi, 92
  mov edx, 2207
  call _d_arraybounds@PLT

If we would have perfect abstraction, I would have suggested str.retro.until!(s => chars.indexOf(s) > 0);, but unfortunately that's - even for the LLVM optimizer - too much.

edit: after thinking about this, it's is probably better here to do auto-decoding there because the user-input of chars could consist of utf-8 chars :/

break;
}
return str;
}
}

///
@safe pure
unittest
{
assert(stripRight(" hello world ", "x") ==
" hello world ");
assert(stripRight(" hello world ", " ") ==
" hello world");
assert(stripRight(" hello worldxy ", "xy ") ==
" hello world");
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same goes for stripRight. The original overload + unittests should be left as-is in their original order, and the new stripRight should be added after the original unittest, and the new tests should follow, in their own unittest block.

}

@safe pure unittest
{
assert(testAliasedString!stripRight("hello xyz ", "xyz "));
}

@safe pure unittest
{
import std.array : array;
import std.utf : byChar, byDchar, byUTF, byWchar;

assert(stripRight(" hello world xyz ".byChar,
"xyz ").array == " hello world");
assert(stripRight("\u2028hello world\u2020\u2028"w.byWchar,
"\u2028").array == "\u2028hello world\u2020");
assert(stripRight("hello world\U00010001"w.byWchar,
" ").array == "hello world\U00010001"w);
assert(stripRight("hello world\U00010001 xy"d.byDchar,
"\U00010001 xy").array == "hello world"d);
assert(stripRight("hello\u2020"w, "\u2020"w) == "hello"w);
assert(stripRight("hello\U00010001"d, "\U00010001"d) == "hello"d);
assert(stripRight(" hello ", "") == " hello ");
}


/++
Strips both leading and trailing whitespace (as defined by
$(REF isWhite, std,uni)).
$(REF isWhite, std,uni)) or as specified in the second argument.

Params:
str = string or random access range of characters
chars = string of characters to be stripped
leftChars = string of leading characters to be stripped
rightChars = string of trailing characters to be stripped

Returns:
slice of $(D str) stripped of leading and trailing whitespace.
slice of $(D str) stripped of leading and trailing whitespace
or characters as specified in the second argument.

See_Also:
Generic stripping on ranges: $(REF _strip, std, algorithm, mutation)
Expand Down Expand Up @@ -3177,6 +3295,125 @@ if (isConvertibleToString!Range)
});
}

/// Ditto
auto strip(Range, Char)(Range str, const(Char)[] chars)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An additional overload would take a third argument:

auto strip(Range, Char)(Range str, const(Char)[] leftChars, const(Char)[] rightChars)

and would strip leftChars on the left and rightChars on the right.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. I will add it

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the new overload as suggested and added the respective tests.

if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
isConvertibleToString!Range) && isSomeChar!Char)
{
static if (isConvertibleToString!Range)
return strip!(StringTypeOf!Range)(str, chars);
else
return stripRight(stripLeft(str, chars), chars);
}

///
@safe pure unittest
{
assert(strip(" hello world ", "x") ==
" hello world ");
assert(strip(" hello world ", " ") ==
"hello world");
assert(strip(" xyxyhello worldxyxy ", "xy ") ==
"hello world");
assert(strip("\u2020hello\u2020"w, "\u2020"w) == "hello"w);
assert(strip("\U00010001hello\U00010001"d, "\U00010001"d) == "hello"d);
assert(strip(" hello ", "") == " hello ");
}

@safe pure unittest
{
assert(testAliasedString!strip(" xyz hello world xyz ", "xyz "));
}

/// Ditto
auto strip(Range, Char)(Range str, const(Char)[] leftChars, const(Char)[] rightChars)
if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
isConvertibleToString!Range) && isSomeChar!Char)
{
static if (isConvertibleToString!Range)
return strip!(StringTypeOf!Range)(str, leftChars, rightChars);
else
return stripRight(stripLeft(str, leftChars), rightChars);
}

///
@safe pure unittest
{
assert(strip("xxhelloyy", "x", "y") == "hello");
assert(strip(" xyxyhello worldxyxyzz ", "xy ", "xyz ") ==
"hello world");
assert(strip("\u2020hello\u2028"w, "\u2020"w, "\u2028"w) == "hello"w);
assert(strip("\U00010001hello\U00010002"d, "\U00010001"d, "\U00010002"d) ==
"hello"d);
assert(strip(" hello ", "", "") == " hello ");
}

@safe pure unittest
{
assert(testAliasedString!strip(" xy hello world pq ", "xy ", "pq "));
}

@safe pure unittest
{
import std.algorithm.comparison : equal;
import std.conv : to;
import std.exception : assertCTFEable;

assertCTFEable!(
{
static foreach (S; AliasSeq!( char[], const char[], string,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Come to think of it, this should also be indented too.

wchar[], const wchar[], wstring,
dchar[], const dchar[], dstring))
{
assert(equal(stripLeft(to!S(" \tfoo\t "), "\t "), "foo\t "));
assert(equal(stripLeft(to!S("\u2008 foo\t \u2007"), "\u2008 "),
"foo\t \u2007"));
assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085 "),
"μ \u0085 \u00BB \r"));
assert(equal(stripLeft(to!S("1"), " "), "1"));
assert(equal(stripLeft(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
assert(equal(stripLeft(to!S(""), " "), ""));

assert(equal(stripRight(to!S(" foo\t "), "\t "), " foo"));
assert(equal(stripRight(to!S("\u2008 foo\t \u2007"), "\u2007\t "),
"\u2008 foo"));
assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r"), "\r "),
"\u0085 μ \u0085 \u00BB"));
assert(equal(stripRight(to!S("1"), " "), "1"));
assert(equal(stripRight(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
assert(equal(stripRight(to!S(""), " "), ""));

assert(equal(strip(to!S(" foo\t "), "\t "), "foo"));
assert(equal(strip(to!S("\u2008 foo\t \u2007"), "\u2008\u2007\t "),
"foo"));
assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085\r "),
"μ \u0085 \u00BB"));
assert(equal(strip(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
assert(equal(strip(to!S(""), " "), ""));

assert(equal(strip(to!S(" \nfoo\t "), "\n ", "\t "), "foo"));
assert(equal(strip(to!S("\u2008\n foo\t \u2007"),
"\u2008\n ", "\u2007\t "), "foo"));
assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB μ \u00BB\r"),
"\u0085 ", "\u00BB\r "), "μ \u0085 \u00BB μ"));
assert(equal(strip(to!S("\U0010FFFE"), " ", " "), "\U0010FFFE"));
assert(equal(strip(to!S(""), " ", " "), ""));
}
});
}

@safe pure unittest
{
import std.array : sameHead, sameTail;
import std.exception : assertCTFEable;
assertCTFEable!(
{
wstring s = " xyz ";
assert(s.sameTail(s.stripLeft(" ")));
assert(s.sameHead(s.stripRight(" ")));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: please indent the body of the function literal here.

});
}


/++
If $(D str) ends with $(D delimiter), then $(D str) is returned without
Expand Down