-
-
Notifications
You must be signed in to change notification settings - Fork 752
Add std.string.outdent (try #3) #282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,7 +60,7 @@ module std.string; | |
|
|
||
| import core.exception : onRangeError; | ||
| import core.vararg, core.stdc.stdlib, core.stdc.string, | ||
| std.ascii, std.conv, std.exception, std.format, std.functional, | ||
| std.algorithm, std.ascii, std.conv, std.exception, std.format, std.functional, | ||
| std.metastrings, std.range, std.regex, std.traits, | ||
| std.typetuple, std.uni, std.utf; | ||
|
|
||
|
|
@@ -3833,6 +3833,217 @@ unittest | |
| assert(wrap("u u") == "u u\n"); | ||
| } | ||
|
|
||
| /****************************************** | ||
| * Removes indentation from a multi-line string or an array of single-line strings. | ||
| * | ||
| * This uniformly outdents the text as much as possible. | ||
| * Whitespace-only lines are always converted to blank lines. | ||
| * | ||
| * A StringException will be thrown if inconsistent indentation prevents | ||
| * the input from being outdented. | ||
| * | ||
| * Works at compile-time. | ||
| * | ||
| * Example: | ||
| * --- | ||
| * writeln(q{ | ||
| * import std.stdio; | ||
| * void main() { | ||
| * writeln("Hello"); | ||
| * } | ||
| * }.outdent()); | ||
| * --- | ||
| * | ||
| * Output: | ||
| * --- | ||
| * | ||
| * import std.stdio; | ||
| * void main() { | ||
| * writeln("Hello"); | ||
| * } | ||
| * | ||
| * --- | ||
| * | ||
| */ | ||
|
|
||
| S outdent(S)(S str) if(isSomeString!S) | ||
| { | ||
| return str.splitLines(KeepTerminator.yes).outdent().join(); | ||
| } | ||
|
|
||
| /// ditto | ||
| S[] outdent(S)(S[] lines) if(isSomeString!S) | ||
| { | ||
| if (lines.empty) | ||
| { | ||
| return null; | ||
| } | ||
|
|
||
| static S leadingWhiteOf(S str) | ||
| { | ||
| return str[ 0 .. $-find!(not!(std.uni.isWhite))(str).length ]; | ||
| } | ||
|
|
||
| S shortestIndent; | ||
| foreach (i, line; lines) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could just do
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed. I think the indicies are holdovers from old versions.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember why I did that: If I just use
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah. Okay. Hopefully that restriction gets fixed at some point. |
||
| { | ||
| auto stripped = __ctfe? line.ctfe_strip() : line.strip(); | ||
|
|
||
| if (stripped.empty) | ||
| { | ||
| lines[i] = line[line.chomp().length..$]; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, let me get this straight. If there is nothing by whitespace on this line, you're going to assign the line to itself?
So, unless I'm really misunderstanding something here, this statement does nothing but eat up extra CPU cycles.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. chomp != strip According to the docs, chomp simply removes the trailing newline (whichever of the five newline styles it may be - seriously, did unicode really need to make a bad situation worse?). So no, I'm assigning the line's newline to "line[i]". Ie, It's like doing "lines[i] = null", except it preserves the newline (so that calling outdent doesn't go messing with your newlines).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right. I obviously read the docs too quickly. |
||
| } | ||
| else | ||
| { | ||
| auto indent = leadingWhiteOf(line); | ||
|
|
||
| // Comparing number of code units instead of code points is OK here | ||
| // because this function throws upon inconsistent indentation. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wait. Are you assuming that all whitespace is only one code unit in both UTF-8 and UTF-16? If so, that's an incorrect assumption with unicode.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's actually fine because any inconsistency will be revealed by the startsWith test below. Consider e.g. that one string starts with some ASCII spaces and another starts with some Unicode spaces. The algorithm may make the wrong choice of which has minimal length, but that's not important because the indentation is wrong anyway and the function will throw. |
||
| if (shortestIndent is null || indent.length < shortestIndent.length) | ||
| { | ||
| if (indent.empty) return lines; | ||
| shortestIndent = indent; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| foreach (i; 0..lines.length) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't it be better to just do |
||
| { | ||
| auto stripped = __ctfe? lines[i].ctfe_strip() : lines[i].strip(); | ||
| if (stripped.empty) | ||
| { | ||
| // Do nothing | ||
| } | ||
| else if (lines[i].startsWith(shortestIndent)) | ||
| { | ||
| lines[i] = lines[i][shortestIndent.length..$]; | ||
| } | ||
| else | ||
| { | ||
| if (__ctfe) assert(false, "outdent: Inconsistent indentation"); | ||
| else throw new StringException("outdent: Inconsistent indentation"); | ||
| } | ||
| } | ||
|
|
||
| return lines; | ||
| } | ||
|
|
||
| // TODO: Remove this and use std.string.strip when retro() becomes ctfe-able. | ||
| private S ctfe_strip(S)(S str) if(isSomeString!(Unqual!S)) | ||
| { | ||
| return str.stripLeft().ctfe_stripRight(); | ||
| } | ||
|
|
||
| // TODO: Remove this and use std.string.strip when retro() becomes ctfe-able. | ||
| private S ctfe_stripRight(S)(S str) if(isSomeString!(Unqual!S)) | ||
| { | ||
| size_t endIndex = 0; | ||
| size_t prevIndex = str.length; | ||
|
|
||
| foreach_reverse (i, dchar ch; str) | ||
| { | ||
| if (!std.uni.isWhite(ch)) | ||
| { | ||
| endIndex = prevIndex; | ||
| break; | ||
| } | ||
| prevIndex = i; | ||
| } | ||
|
|
||
| return str[0..endIndex]; | ||
| } | ||
|
|
||
| version(unittest) | ||
| { | ||
| template outdent_testStr(S) | ||
| { | ||
| enum S outdent_testStr = | ||
| " | ||
| \t\tX | ||
| \t\U00010143X | ||
| \t\t | ||
|
|
||
| \t\t\tX | ||
| \t "; | ||
| } | ||
|
|
||
| template outdent_expected(S) | ||
| { | ||
| enum S outdent_expected = | ||
| " | ||
| \tX | ||
| \U00010143X | ||
|
|
||
|
|
||
| \t\tX | ||
| "; | ||
| } | ||
| } | ||
|
|
||
| unittest | ||
| { | ||
| debug(string) printf("string.outdent.unittest\n"); | ||
|
|
||
| static assert(ctfe_strip(" \tHi \r\n") == "Hi"); | ||
| static assert(ctfe_strip(" \tHi©\u2028 \r\n") == "Hi©"); | ||
| static assert(ctfe_strip("Hi") == "Hi"); | ||
| static assert(ctfe_strip(" \t \r\n") == ""); | ||
| static assert(ctfe_strip("") == ""); | ||
|
|
||
| foreach (S; TypeTuple!(string, wstring, dstring)) | ||
| { | ||
| enum S blank = ""; | ||
| assert(blank.outdent() == blank); | ||
| static assert(blank.outdent() == blank); | ||
|
|
||
| enum S testStr1 = " \n \t\n "; | ||
| enum S expected1 = "\n\n"; | ||
| assert(testStr1.outdent() == expected1); | ||
| static assert(testStr1.outdent() == expected1); | ||
|
|
||
| assert(testStr1[0..$-1].outdent() == expected1); | ||
| static assert(testStr1[0..$-1].outdent() == expected1); | ||
|
|
||
| enum S testStr2 = "a\n \t\nb"; | ||
| assert(testStr2.outdent() == testStr2); | ||
| static assert(testStr2.outdent() == testStr2); | ||
|
|
||
| enum S testStr3 = | ||
| " | ||
| \t\tX | ||
| \t\U00010143X | ||
| \t\t | ||
|
|
||
| \t\t\tX | ||
| \t "; | ||
|
|
||
| enum S expected3 = | ||
| " | ||
| \tX | ||
| \U00010143X | ||
|
|
||
|
|
||
| \t\tX | ||
| "; | ||
| assert(testStr3.outdent() == expected3); | ||
| static assert(testStr3.outdent() == expected3); | ||
|
|
||
| enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X"; | ||
| enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X"; | ||
| assert(testStr4.outdent() == expected4); | ||
| static assert(testStr4.outdent() == expected4); | ||
|
|
||
| enum testStr5 = testStr4[0..$-1]; | ||
| enum expected5 = expected4[0..$-1]; | ||
| assert(testStr5.outdent() == expected5); | ||
| static assert(testStr5.outdent() == expected5); | ||
|
|
||
| enum testStr6 = " \r \n \r\n \u2028 \u2029"; | ||
| enum expected6 = "\r\n\r\n\u2028\u2029"; | ||
| assert(testStr6.outdent() == expected6); | ||
| static assert(testStr6.outdent() == expected6); | ||
| } | ||
| } | ||
|
|
||
| private template softDeprec(string vers, string date, string oldFunc, string newFunc) | ||
| { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't this what
std.algorithm.untilis for?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
untilwould be possible but a bit difficult here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, because
untilis lazy. I forgot about that.