-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
I could be misunderstanding Unicode (or the intent of this library), but it seems like incorrect results are produced for some characters, for example:
var str = ('\u0041\u030A\u212B\u00C5');
var show = function(str) {
console.log(Array.prototype.map.call(str, function(c) {
return c.charCodeAt(0).toString(16);
}).join(' + '));
};
var uc = net.kornr.unicode;
// actual: 61 + 30a + 61 + 30a + 61 + 30a
// expected: 61 + 30a + 61 + 30a + 61 + 30a
// comment: all okay
show(uc.lowercase(str));
// actual: 61 + 30a + 61 + 61
// expected: 61 + 61 + 61
// comment: first does not have diacritical mark removed, second okay, third okay
show(uc.lowercase_nomark(str));
// actual: 41 + 30a + c5 + 41 + 30a
// expected: 41 + 30a + 41 + 30a + 41 + 30a
// comment: first okay (already NFD and uppercase), second not decomposed to NFD, third okay
show(uc.uppercase(str));
// actual: 41 + 30a + c5 + 41
// expected: 41 + 41 + 41
// comment: first does not have diacritical mark removed, second not decomposed to NFD, third okay
show(uc.uppercase_nomark(str));Metadata
Metadata
Assignees
Labels
No labels