From fab2b15ef90ca66a1f7cf8a6118e73ae045a57bf Mon Sep 17 00:00:00 2001 From: Martin Beukman Date: Wed, 4 Oct 2023 16:32:16 +0200 Subject: [PATCH] add fallback for unsupported encodings --- go.mod | 6 +-- go.sum | 4 ++ parsemail.go | 106 +++++++++++++++++++++++++++++++++++++++- parsemail_on2it_test.go | 77 +++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 429eaa4..86f8410 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/on2itsecurity/parsemail -go 1.12 +go 1.21 require ( - golang.org/x/net v0.0.0-20200927032502-5d4f70055728 - golang.org/x/text v0.3.0 + golang.org/x/net v0.15.0 + golang.org/x/text v0.13.0 ) diff --git a/go.sum b/go.sum index db323ef..4d76308 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,12 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20200927032502-5d4f70055728 h1:5wtQIAulKU5AbLQOkjxl32UufnIOqgBX72pS0AV14H0= golang.org/x/net v0.0.0-20200927032502-5d4f70055728/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= diff --git a/parsemail.go b/parsemail.go index fdd8d7e..e5705b8 100644 --- a/parsemail.go +++ b/parsemail.go @@ -335,6 +335,8 @@ func decodeMimeSentence(s string) string { ss := strings.Split(s, " ") for _, word := range ss { + word = removeUnsupportedEncoding(word) + w, err := mimeWordDecoder.Decode(word) if err != nil { if len(result) == 0 { @@ -350,6 +352,100 @@ func decodeMimeSentence(s string) string { return strings.Join(result, "") } +func removeUnsupportedEncodingForAddress(s string) string { + if s == "" { + return s + } + + ss := strings.Split(s, " ") + result := []string{} + + for _, word := range ss { + validWord := word + + if !(strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=")) { + result = append(result, validWord) + + continue + } + + word = word[2 : len(word)-2] + + // split word "UTF-8?q?text" into "UTF-8", 'q', and "text" + charset, text, _ := strings.Cut(word, "?") + if charset == "" { + validWord = `"(removed text: non supported charset)"` + } + + encoding, _, _ := strings.Cut(text, "?") + if len(encoding) != 1 { + validWord = `"(removed text: non supported encoding)"` + } + + if charset != "" { + encoder, _ := ianaindex.MIME.Encoding(charset) + + if encoder == nil { + validWord = `"(removed text: non supported encoder)"` + } + } + + result = append(result, validWord) + } + + return strings.Join(result, " ") +} + +func removeUnsupportedEncodingForAddressList(s string) string { + if s == "" { + return s + } + + addresses := s + result := []string{} + + for _, address := range strings.Split(addresses, ",") { + result = append(result, removeUnsupportedEncodingForAddress(address)) + } + + return strings.Join(result, ",") +} + +func removeUnsupportedEncoding(s string) string { + if s == "" { + return s + } + + word := s + + if !(strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=")) { + return word + } + + word = word[2 : len(word)-2] + + // split word "UTF-8?q?text" into "UTF-8", 'q', and "text" + charset, text, _ := strings.Cut(word, "?") + if charset == "" { + return "(removed text: non supported charset)" + } + + encoding, _, _ := strings.Cut(text, "?") + if len(encoding) != 1 { + return "(removed text: non supported encoding)" + } + + if charset != "" { + encoder, _ := ianaindex.MIME.Encoding(charset) + + if encoder == nil { + return "(removed text: non supported encoder)" + } + } + + return s +} + func decodeHeaderMime(header mail.Header) (mail.Header, error) { parsedHeader := map[string][]string{} @@ -503,9 +599,15 @@ var mimeWordDecoder = &mime.WordDecoder{ if err != nil { return nil, err } + + if enc == nil { + return nil, fmt.Errorf("invalid encoding for charset %s", charset) + } + return transform.NewReader(input, enc.NewDecoder()), nil }, } + var addressParser = mail.AddressParser{ WordDecoder: mimeWordDecoder, } @@ -516,7 +618,7 @@ func (hp headerParser) parseAddress(s string) (ma *mail.Address) { } if strings.Trim(s, " \n") != "" { - ma, hp.err = addressParser.Parse(s) + ma, hp.err = addressParser.Parse(removeUnsupportedEncodingForAddress(s)) return ma } @@ -530,7 +632,7 @@ func (hp headerParser) parseAddressList(s string) (ma []*mail.Address) { } if strings.Trim(s, " \n") != "" { - ma, hp.err = addressParser.ParseList(s) + ma, hp.err = addressParser.ParseList(removeUnsupportedEncodingForAddressList(s)) return } diff --git a/parsemail_on2it_test.go b/parsemail_on2it_test.go index bdc4751..b0e7928 100644 --- a/parsemail_on2it_test.go +++ b/parsemail_on2it_test.go @@ -63,6 +63,20 @@ func Test_decodeMimeSentence(t *testing.T) { }, `John Do€`, }, + { + "utf-7", + args{ + `=?utf-7?B?Sm9obiBEbytJS3ct?=`, + }, + `(removed text: non supported encoder)`, + }, + { + "gb2312", + args{ + `=?gb2312?B?Sm9obiBEb2U=?=`, + }, + `(removed text: non supported encoder)`, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -141,6 +155,26 @@ func Test_headerParser_parseAddress(t *testing.T) { Address: `john.doe@example.com`, }, }, + { + "utf-7", + args{ + `=?utf-7?B?Sm9obiBEbytJS3ct?= `, + }, + &mail.Address{ + Name: `(removed text: non supported encoder)`, + Address: `john.doe@example.com`, + }, + }, + { + "gb2312", + args{ + `=?gb2312?B?Sm9obiBEb2U=?= `, + }, + &mail.Address{ + Name: `(removed text: non supported encoder)`, + Address: `john.doe@example.com`, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -232,6 +266,49 @@ func Test_headerParser_parseAddressList(t *testing.T) { }, }, }, + { + "utf-7", + args{ + `=?utf-7?B?Sm9obiBEbytJS3ct?= `, + }, + []*mail.Address{ + { + Name: `(removed text: non supported encoder)`, + Address: `john.doe@example.com`, + }, + }, + }, + { + "gb2312", + args{ + `=?gb2312?B?Sm9obiBEb2U=?= `, + }, + []*mail.Address{ + { + Name: `(removed text: non supported encoder)`, + Address: `john.doe@example.com`, + }, + }, + }, + { + "multiple_charsets with unsupported encoders", + args{ + `test@example.com,=?utf-8?Q?John_D=C3=B8e?= ,=?gb2312?B?Sm9obiBEb2U=?= `, + }, + []*mail.Address{ + { + Address: `test@example.com`, + }, + { + Name: `John Døe`, + Address: `john.doe@example.com`, + }, + { + Name: `(removed text: non supported encoder)`, + Address: `john.doe@example.com`, + }, + }, + }, { "multiple_charsets", args{