*: change default charset and collation from 'utf8 utf8_bin' to 'utf8mb4 utf8mb4_bin'#7965
Conversation
| tp.Decimal = UnspecifiedLength | ||
| tp.Charset = mysql.DefaultCharset | ||
| tp.Collate = mysql.DefaultCollationName | ||
| tp.Charset = charset.CharsetUTF8MB4 |
There was a problem hiding this comment.
Can we use GetDefaultCharsetAndCollate?
There was a problem hiding this comment.
This change makes sense to me for this PR. My PR will change it back though :)
|
@shenli PTAL |
|
The ci will be fixed after pingcap/parser#13 merged. |
|
/run-all-tests |
|
/run-all-tests -tidb-test=pr/646 |
|
/run-integration-ddl-test -tidb-test=pr/646 |
|
/run-sqllogic-test -tidb-test=pr/646 |
|
/run-integration-ddl-test -tidb-test=pr/646 |
| a.RetTp.Charset = charset.CharsetUTF8 | ||
| a.RetTp.Collate = charset.CollationUTF8 | ||
| a.RetTp.Charset = charset.CharsetUTF8MB4 | ||
| a.RetTp.Collate = charset.CollationUTF8MB4 |
There was a problem hiding this comment.
s/charset.CharsetUTF8MB4/mysql.DefaultCharset/
s/charset.CollationUTF8MB4/mysql.DefaultCollationName/
There was a problem hiding this comment.
or: charset.GetDefaultCharsetAndCollate()
| fieldType.Charset, fieldType.Collate = charset.CharsetBin, charset.CollationBin | ||
| } else { | ||
| fieldType.Charset, fieldType.Collate = charset.CharsetUTF8, charset.CharsetUTF8 | ||
| fieldType.Charset, fieldType.Collate = mysql.DefaultCharset, mysql.DefaultCollationName |
There was a problem hiding this comment.
how about: charset.GetDefaultCharsetAndCollate()
| } | ||
| if len(b.tp.Charset) <= 0 { | ||
| b.tp.Charset, b.tp.Collate = charset.CharsetUTF8, charset.CollationUTF8 | ||
| b.tp.Charset, b.tp.Collate = mysql.DefaultCharset, mysql.DefaultCollationName |
| Charset: charset.CharsetUTF8, | ||
| Collate: charset.CollationUTF8, | ||
| Charset: charset.CharsetUTF8MB4, | ||
| Collate: charset.CollationUTF8MB4, |
| } | ||
| tp := types.NewFieldType(mysql.TypeVarString) | ||
| tp.Charset, tp.Collate = charset.CharsetUTF8, charset.CollationUTF8 | ||
| tp.Charset, tp.Collate = charset.CharsetUTF8MB4, charset.CollationUTF8MB4 |
| Decimal: 0, | ||
| Charset: charset.CharsetUTF8, | ||
| Collate: charset.CollationUTF8, | ||
| Charset: mysql.DefaultCharset, |
There was a problem hiding this comment.
Why not charset.CharsetUTF8MB4?
| Charset: charset.CharsetUTF8, | ||
| Collate: charset.CollationUTF8, | ||
| Charset: mysql.DefaultCharset, | ||
| Collate: mysql.DefaultCollationName, |
There was a problem hiding this comment.
why not charset.CollationUTF8MB4
| fieldType.Charset, fieldType.Collate = charset.CharsetBin, charset.CollationBin | ||
| } else { | ||
| fieldType.Charset, fieldType.Collate = charset.CharsetUTF8, charset.CharsetUTF8 | ||
| fieldType.Charset, fieldType.Collate = mysql.DefaultCharset, mysql.DefaultCollationName |
| } | ||
| if len(b.tp.Charset) <= 0 { | ||
| b.tp.Charset, b.tp.Collate = charset.CharsetUTF8, charset.CollationUTF8 | ||
| b.tp.Charset, b.tp.Collate = mysql.DefaultCharset, mysql.DefaultCollationName |
| types.SetBinChsClnFlag(resultFieldType) | ||
| } else { | ||
| resultFieldType.Charset, resultFieldType.Collate, resultFieldType.Flag = charset.CharsetUTF8, charset.CollationUTF8, 0 | ||
| resultFieldType.Charset, resultFieldType.Collate, resultFieldType.Flag = mysql.DefaultCharset, mysql.DefaultCollationName, 0 |
| fieldTp.Decimal, fieldTp.Flen = decimal, flen | ||
| if fieldTp.EvalType().IsStringKind() && !isBinaryStr { | ||
| fieldTp.Charset, fieldTp.Collate = mysql.DefaultCharset, mysql.DefaultCollationName | ||
| fieldTp.Charset, fieldTp.Collate = charset.CharsetUTF8MB4, charset.CollationUTF8MB4 |
There was a problem hiding this comment.
/cc @XuHuaiyu , use mysql.DefaultCharset or charset.CharsetUTF8MB4?
There was a problem hiding this comment.
Will mysql.DefaultCharset change? If so, we should we utf8mb4.
There was a problem hiding this comment.
@XuHuaiyu, DefaultCharset is better. It hides the implementation details, once we change the default charset again, the code modification can be minimized.
There was a problem hiding this comment.
no, this place should be utf8mb4, if we did not set the charset, it can be default charset, but in here, it should definitely be utf8mb4.
|
@zz-jason @crazycs520 PTAL |
| charsetName := tb.Meta().Charset | ||
| if len(charsetName) == 0 { | ||
| charsetName = charset.CharsetUTF8 | ||
| charsetName = charset.CharsetUTF8MB4 |
There was a problem hiding this comment.
s/charset.CharsetUTF8MB4/mysql.DefaultCharset/
| } | ||
| if types.IsNonBinaryStr(lhs) && !types.IsBinaryStr(rhs) { | ||
| resultFieldType.Charset, resultFieldType.Collate, resultFieldType.Flag = charset.CharsetUTF8, charset.CollationUTF8, 0 | ||
| resultFieldType.Charset, resultFieldType.Collate, resultFieldType.Flag = charset.CharsetUTF8MB4, charset.CollationUTF8MB4, 0 |
There was a problem hiding this comment.
use mysql.DefaultCharset, mysql.DefaultCollationName instead.
| cs = mysql.DefaultCharset | ||
| cl = mysql.DefaultCollationName | ||
| cs = charset.CharsetUTF8MB4 | ||
| cl = charset.CollationUTF8MB4 |
There was a problem hiding this comment.
cs, cl = charset.GetDefaultCharsetAndCollate()
| mCharset = mysql.DefaultCharset | ||
| mCollation = mysql.DefaultCollationName | ||
| mCharset = charset.CharsetUTF8MB4 | ||
| mCollation = charset.CollationUTF8MB4 |
There was a problem hiding this comment.
charset.GetDefaultCharsetAndCollate()
There was a problem hiding this comment.
here should be definitely utf8mb4
| var err error | ||
| if cfg.Socket != "" { | ||
| if s.listener, err = net.Listen("unix", cfg.Socket); err == nil { | ||
| // job.SnapshotVer == 0 means |
What problem does this PR solve?
fix #7920.
Change TiDB default charset and collation to "utf8mb4 utf8mb4_bin", TiDB treat all the data as utf8mb4 actually, but the previous default charset is "utf8", insert the 4 bytes unicode string into TiDB will be ok, but if we use mysqldump to restore the data back into mysql, the charset will be utf8, and it will report an error
ERROR 1366 (HY000): Incorrect string value: '\xF0\xA4\x8B\xAE' for column 'v' at row 1.how it works?
mysql.DefaultCharsetfromUTF8CharsettoUTF8MB4Charset.mysql.DefaultCollationNamefromUTF8DefaultCollationtoUTF8MB4DefaultCollation.charset.CharsetUTF8andCollationUTF8, modify them tocharset.CharsetUTF8MB4ormysql.DefaultCharset.Then fix corresponding test cases.
Check List
Tests
Code changes
Related changes