Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 212 additions & 1 deletion std/string.d
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ module std.string;

import core.exception : onRangeError;
import core.vararg, core.stdc.stdlib, core.stdc.string,
std.ascii, std.conv, std.exception, std.format, std.functional,
std.algorithm, std.ascii, std.conv, std.exception, std.format, std.functional,
std.metastrings, std.range, std.regex, std.traits,
std.typetuple, std.uni, std.utf;

Expand Down Expand Up @@ -3833,6 +3833,217 @@ unittest
assert(wrap("u u") == "u u\n");
}

/******************************************
* Removes indentation from a multi-line string or an array of single-line strings.
*
* This uniformly outdents the text as much as possible.
* Whitespace-only lines are always converted to blank lines.
*
* A StringException will be thrown if inconsistent indentation prevents
* the input from being outdented.
*
* Works at compile-time.
*
* Example:
* ---
* writeln(q{
* import std.stdio;
* void main() {
* writeln("Hello");
* }
* }.outdent());
* ---
*
* Output:
* ---
*
* import std.stdio;
* void main() {
* writeln("Hello");
* }
*
* ---
*
*/

S outdent(S)(S str) if(isSomeString!S)
{
return str.splitLines(KeepTerminator.yes).outdent().join();
}

/// ditto
S[] outdent(S)(S[] lines) if(isSomeString!S)
{
if (lines.empty)
{
return null;
}

static S leadingWhiteOf(S str)
{
return str[ 0 .. $-find!(not!(std.uni.isWhite))(str).length ];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this what std.algorithm.until is for?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using until would be possible but a bit difficult here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, because until is lazy. I forgot about that.

}

S shortestIndent;
foreach (i, line; lines)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could just do foreach(ref line; lines) and skip the index.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed. I think the indicies are holdovers from old versions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember why I did that: If I just use foreach(ref line; lines), then it breaks under CTFE.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. Okay. Hopefully that restriction gets fixed at some point.

{
auto stripped = __ctfe? line.ctfe_strip() : line.strip();

if (stripped.empty)
{
lines[i] = line[line.chomp().length..$];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, let me get this straight. If there is nothing by whitespace on this line, you're going to assign the line to itself?

stripped will be empty only if there's only whitespace on the line, in which case, calling chomp on the line will result in an empty string, whose length is then 0, so, you'll be doing lines[i] = line[0 .. $] when lines[i] is the same as line, so you'll effectively be doing lines[i] = lines[i].

So, unless I'm really misunderstanding something here, this statement does nothing but eat up extra CPU cycles.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chomp != strip

According to the docs, chomp simply removes the trailing newline (whichever of the five newline styles it may be - seriously, did unicode really need to make a bad situation worse?).

So no, I'm assigning the line's newline to "line[i]". Ie, It's like doing "lines[i] = null", except it preserves the newline (so that calling outdent doesn't go messing with your newlines).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. I obviously read the docs too quickly.

}
else
{
auto indent = leadingWhiteOf(line);

// Comparing number of code units instead of code points is OK here
// because this function throws upon inconsistent indentation.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait. Are you assuming that all whitespace is only one code unit in both UTF-8 and UTF-16? If so, that's an incorrect assumption with unicode.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's actually fine because any inconsistency will be revealed by the startsWith test below.

Consider e.g. that one string starts with some ASCII spaces and another starts with some Unicode spaces. The algorithm may make the wrong choice of which has minimal length, but that's not important because the indentation is wrong anyway and the function will throw.

if (shortestIndent is null || indent.length < shortestIndent.length)
{
if (indent.empty) return lines;
shortestIndent = indent;
}
}
}

foreach (i; 0..lines.length)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be better to just do foreach(ref line; lines)?

{
auto stripped = __ctfe? lines[i].ctfe_strip() : lines[i].strip();
if (stripped.empty)
{
// Do nothing
}
else if (lines[i].startsWith(shortestIndent))
{
lines[i] = lines[i][shortestIndent.length..$];
}
else
{
if (__ctfe) assert(false, "outdent: Inconsistent indentation");
else throw new StringException("outdent: Inconsistent indentation");
}
}

return lines;
}

// TODO: Remove this and use std.string.strip when retro() becomes ctfe-able.
private S ctfe_strip(S)(S str) if(isSomeString!(Unqual!S))
{
return str.stripLeft().ctfe_stripRight();
}

// TODO: Remove this and use std.string.strip when retro() becomes ctfe-able.
private S ctfe_stripRight(S)(S str) if(isSomeString!(Unqual!S))
{
size_t endIndex = 0;
size_t prevIndex = str.length;

foreach_reverse (i, dchar ch; str)
{
if (!std.uni.isWhite(ch))
{
endIndex = prevIndex;
break;
}
prevIndex = i;
}

return str[0..endIndex];
}

version(unittest)
{
template outdent_testStr(S)
{
enum S outdent_testStr =
"
\t\tX
\t\U00010143X
\t\t

\t\t\tX
\t ";
}

template outdent_expected(S)
{
enum S outdent_expected =
"
\tX
\U00010143X


\t\tX
";
}
}

unittest
{
debug(string) printf("string.outdent.unittest\n");

static assert(ctfe_strip(" \tHi \r\n") == "Hi");
static assert(ctfe_strip(" \tHi&copy;\u2028 \r\n") == "Hi&copy;");
static assert(ctfe_strip("Hi") == "Hi");
static assert(ctfe_strip(" \t \r\n") == "");
static assert(ctfe_strip("") == "");

foreach (S; TypeTuple!(string, wstring, dstring))
{
enum S blank = "";
assert(blank.outdent() == blank);
static assert(blank.outdent() == blank);

enum S testStr1 = " \n \t\n ";
enum S expected1 = "\n\n";
assert(testStr1.outdent() == expected1);
static assert(testStr1.outdent() == expected1);

assert(testStr1[0..$-1].outdent() == expected1);
static assert(testStr1[0..$-1].outdent() == expected1);

enum S testStr2 = "a\n \t\nb";
assert(testStr2.outdent() == testStr2);
static assert(testStr2.outdent() == testStr2);

enum S testStr3 =
"
\t\tX
\t\U00010143X
\t\t

\t\t\tX
\t ";

enum S expected3 =
"
\tX
\U00010143X


\t\tX
";
assert(testStr3.outdent() == expected3);
static assert(testStr3.outdent() == expected3);

enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X";
enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X";
assert(testStr4.outdent() == expected4);
static assert(testStr4.outdent() == expected4);

enum testStr5 = testStr4[0..$-1];
enum expected5 = expected4[0..$-1];
assert(testStr5.outdent() == expected5);
static assert(testStr5.outdent() == expected5);

enum testStr6 = " \r \n \r\n \u2028 \u2029";
enum expected6 = "\r\n\r\n\u2028\u2029";
assert(testStr6.outdent() == expected6);
static assert(testStr6.outdent() == expected6);
}
}

private template softDeprec(string vers, string date, string oldFunc, string newFunc)
{
Expand Down