Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 30 additions & 8 deletions std/uni.d
Original file line number Diff line number Diff line change
Expand Up @@ -6930,8 +6930,8 @@ unittest
than $(LREF icmp). However keep in mind the warning below.)

Params:
str1 = a string or a $(D ForwardRange) of $(D dchar)s
str2 = a string or a $(D ForwardRange) of $(D dchar)s
str1 = a string
str2 = a string

Returns:
An $(D int) that is 0 if the strings match,
Expand All @@ -6947,17 +6947,16 @@ unittest
$(LREF icmp)
$(XREF_PACK algorithm,comparison,cmp)
+/
int sicmp(S1, S2)(S1 str1, S2 str2)
if (isForwardRange!S1 && is(Unqual!(ElementType!S1) == dchar)
&& isForwardRange!S2 && is(Unqual!(ElementType!S2) == dchar))
int sicmp(S1, S2)(S1 str1, S2 str2) if (isSomeString!S1 && isSomeString!S2)
{
alias sTable = simpleCaseTable;
import std.utf : decode;

size_t ridx=0;
foreach (dchar lhs; str1)
{
if (ridx == str2.length)
return 1;
import std.utf : decode;
dchar rhs = decode(str2, ridx);
int diff = lhs - rhs;
if (!diff)
Expand Down Expand Up @@ -6991,7 +6990,8 @@ int sicmp(S1, S2)(S1 str1, S2 str2)
}

///
unittest{
unittest
{
assert(sicmp("Август", "авгусТ") == 0);
// Greek also works as long as there is no 1:M mapping in sight
assert(sicmp("ΌΎ", "όύ") == 0);
Expand Down Expand Up @@ -7060,6 +7060,15 @@ private int fullCasedCmp(Range)(dchar lhs, dchar rhs, ref Range rtail)
The cost of $(D icmp) being pedantically correct is
slightly worse performance.
)

Returns:
An $(D int) that is 0 if the strings match,
<0 if $(D str1) is lexicographically "less" than $(D str2),
>0 if $(D str1) is lexicographically "greater" than $(D str2)

See_Also:
$(LREF sicmp)
$(XREF_PACK algorithm,comparison,cmp)
+/
int icmp(S1, S2)(S1 str1, S2 str2)
if (isForwardRange!S1 && is(Unqual!(ElementType!S1) == dchar)
Expand Down Expand Up @@ -7094,11 +7103,24 @@ int icmp(S1, S2)(S1 str1, S2 str2)
}

///
unittest{
unittest
{
assert(icmp("Rußland", "Russland") == 0);
assert(icmp("ᾩ -> \u1F70\u03B9", "\u1F61\u03B9 -> ᾲ") == 0);
}

/**
* By using $(XREF utf, byUTF) and its aliases, GC allocations via auto-decoding
* and thrown exceptions can be avoided, making `icmp` `@safe @nogc nothrow pure`.
*/
@safe @nogc nothrow pure unittest
{
import std.utf : byDchar;

assert(icmp("Rußland".byDchar, "Russland".byDchar) == 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait what???
In German (my mother tongue) this is not guaranteed to be correct. From Wikipedia:

Thus it helps to distinguish words like Buße (long vowel) 'penance, fine' and Busse (short vowel) 'buses'.

https://en.m.wikipedia.org/wiki/%C3%9F

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is caseless comparison and follows rules of Unicode

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is caseless comparison and follows rules of Unicode

Oh sorry. Overlooked from my phone :/

assert(icmp("ᾩ -> \u1F70\u03B9".byDchar, "\u1F61\u03B9 -> ᾲ".byDchar) == 0);
}

// overloads for the most common cases to reduce compile time
@safe pure /*TODO nothrow*/
{
Expand Down