From 634080399ee0b32dfcceb7a1fbe23134d3cc0aa0 Mon Sep 17 00:00:00 2001 From: Silvio Gissi Date: Fri, 3 Aug 2018 09:10:27 -0700 Subject: [PATCH 1/3] Ignore UTF8 BOM Signed-off-by: Silvio Gissi --- expfmt/text_parse.go | 5 +++++ expfmt/text_parse_test.go | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/expfmt/text_parse.go b/expfmt/text_parse.go index ec3d86ba7..037b4e81a 100644 --- a/expfmt/text_parse.go +++ b/expfmt/text_parse.go @@ -125,6 +125,11 @@ func (p *TextParser) reset(in io.Reader) { } else { p.buf.Reset(in) } + // Some clients might throw in UTF-8 BOM chars, ignore if found. + maybeBOM, err := p.buf.Peek(3) + if len(maybeBOM) == 3 && maybeBOM[0] == 0xEF && maybeBOM[1] == 0xBB && maybeBOM[2] == 0xBF && err == nil { + p.buf.Discard(3) + } p.err = nil p.lineCount = 0 if p.summaries == nil || len(p.summaries) > 0 { diff --git a/expfmt/text_parse_test.go b/expfmt/text_parse_test.go index 76c951185..ad716af5d 100644 --- a/expfmt/text_parse_test.go +++ b/expfmt/text_parse_test.go @@ -384,6 +384,23 @@ request_duration_microseconds_count 2693 }, }, }, + // 5: Skip UTF-8 BOM. + { + in: "\xef\xbb\xbfafter_utf8_bom 1\n", + out: []*dto.MetricFamily{ + &dto.MetricFamily{ + Name: proto.String("after_utf8_bom"), + Type: dto.MetricType_UNTYPED.Enum(), + Metric: []*dto.Metric{ + &dto.Metric{ + Untyped: &dto.Untyped{ + Value: proto.Float64(1), + }, + }, + }, + }, + }, + }, } for i, scenario := range scenarios { From 4a59aa9ff2d393685fc41211e2385413ab6b837f Mon Sep 17 00:00:00 2001 From: Silvio Gissi Date: Mon, 6 Aug 2018 15:13:16 -0700 Subject: [PATCH 2/3] Error if Unicode BOM is present instead of skipping. --- expfmt/text_parse.go | 28 +++++++++++++++++++++----- expfmt/text_parse_test.go | 42 +++++++++++++++++++++++---------------- 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/expfmt/text_parse.go b/expfmt/text_parse.go index 037b4e81a..a70b0d378 100644 --- a/expfmt/text_parse.go +++ b/expfmt/text_parse.go @@ -99,6 +99,29 @@ type TextParser struct { // input concurrently, instantiate a separate Parser for each goroutine. func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) { p.reset(in) + + // Some clients might throw in Unicode BOM chars which is explicitly forbidden. + maybeBOM, err := p.buf.Peek(4) + if err == nil && len(maybeBOM) >= 2 { + // UTF-8 BOM. + if len(maybeBOM) >= 3 && maybeBOM[0] == 0xEF && maybeBOM[1] == 0xBB && maybeBOM[2] == 0xBF { + p.parseError("UTF-8 BOM detected but not supported") + } + // UTF-16 BOM. + if (maybeBOM[0] == 0xFE && maybeBOM[1] == 0xFF) || (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE) { + p.parseError("UTF-16 BOM detected but not supported") + } + // UTF-32 BOM. + if len(maybeBOM) == 4 && + ((maybeBOM[0] == 0x00 && maybeBOM[1] == 0x00 && maybeBOM[2] == 0xFE && maybeBOM[3] == 0xFF) || + (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE && maybeBOM[2] == 0x00 && maybeBOM[3] == 0x00)) { + p.parseError("UTF-32 BOM detected but not supported") + } + if p.err != nil { + return p.metricFamiliesByName, p.err + } + } + for nextState := p.startOfLine; nextState != nil; nextState = nextState() { // Magic happens here... } @@ -125,11 +148,6 @@ func (p *TextParser) reset(in io.Reader) { } else { p.buf.Reset(in) } - // Some clients might throw in UTF-8 BOM chars, ignore if found. - maybeBOM, err := p.buf.Peek(3) - if len(maybeBOM) == 3 && maybeBOM[0] == 0xEF && maybeBOM[1] == 0xBB && maybeBOM[2] == 0xBF && err == nil { - p.buf.Discard(3) - } p.err = nil p.lineCount = 0 if p.summaries == nil || len(p.summaries) > 0 { diff --git a/expfmt/text_parse_test.go b/expfmt/text_parse_test.go index ad716af5d..569ceebc9 100644 --- a/expfmt/text_parse_test.go +++ b/expfmt/text_parse_test.go @@ -384,23 +384,6 @@ request_duration_microseconds_count 2693 }, }, }, - // 5: Skip UTF-8 BOM. - { - in: "\xef\xbb\xbfafter_utf8_bom 1\n", - out: []*dto.MetricFamily{ - &dto.MetricFamily{ - Name: proto.String("after_utf8_bom"), - Type: dto.MetricType_UNTYPED.Enum(), - Metric: []*dto.Metric{ - &dto.Metric{ - Untyped: &dto.Untyped{ - Value: proto.Float64(1), - }, - }, - }, - }, - }, - }, } for i, scenario := range scenarios { @@ -581,6 +564,31 @@ metric_bucket{le="bla"} 3.14 in: "metric{l=\"\xbd\"} 3.14\n", err: "text format parsing error in line 1: invalid label value \"\\xbd\"", }, + // 20: UTF-8 BOM present. + { + in: "\xef\xbb\xbfafter_utf8_bom 1\n", + err: "text format parsing error in line 0: UTF-8 BOM detected but not supported", + }, + // 21: UTF-16 LE BOM present. + { + in: "\xfe\xffafter_utf16le_bom 1\n", + err: "text format parsing error in line 0: UTF-16 BOM detected but not supported", + }, + // 22: UTF-16 BE BOM present. + { + in: "\xff\xfeafter_utf16be_bom 1\n", + err: "text format parsing error in line 0: UTF-16 BOM detected but not supported", + }, + // 23: UTF-32 LE BOM present. + { + in: "\xff\xfe\x00\x00after_utf32le_bom 1\n", + err: "text format parsing error in line 0: UTF-32 BOM detected but not supported", + }, + // 24: UTF-32 BE BOM present. + { + in: "\x00\x00\xfe\xffafter_utf32be_bom 1\n", + err: "text format parsing error in line 0: UTF-32 BOM detected but not supported", + }, } for i, scenario := range scenarios { From 139de7f0c17c3c118b1991b76e82a77099b7817c Mon Sep 17 00:00:00 2001 From: Silvio Gissi Date: Mon, 6 Aug 2018 15:35:43 -0700 Subject: [PATCH 3/3] Move BOM checks to its own function. Signed-off-by: Silvio Gissi --- expfmt/text_parse.go | 47 ++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/expfmt/text_parse.go b/expfmt/text_parse.go index a70b0d378..77f8cee73 100644 --- a/expfmt/text_parse.go +++ b/expfmt/text_parse.go @@ -101,25 +101,9 @@ func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricF p.reset(in) // Some clients might throw in Unicode BOM chars which is explicitly forbidden. - maybeBOM, err := p.buf.Peek(4) - if err == nil && len(maybeBOM) >= 2 { - // UTF-8 BOM. - if len(maybeBOM) >= 3 && maybeBOM[0] == 0xEF && maybeBOM[1] == 0xBB && maybeBOM[2] == 0xBF { - p.parseError("UTF-8 BOM detected but not supported") - } - // UTF-16 BOM. - if (maybeBOM[0] == 0xFE && maybeBOM[1] == 0xFF) || (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE) { - p.parseError("UTF-16 BOM detected but not supported") - } - // UTF-32 BOM. - if len(maybeBOM) == 4 && - ((maybeBOM[0] == 0x00 && maybeBOM[1] == 0x00 && maybeBOM[2] == 0xFE && maybeBOM[3] == 0xFF) || - (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE && maybeBOM[2] == 0x00 && maybeBOM[3] == 0x00)) { - p.parseError("UTF-32 BOM detected but not supported") - } - if p.err != nil { - return p.metricFamiliesByName, p.err - } + if bomType := p.hasBOM(); bomType != "" { + p.parseError(bomType + " BOM detected but not supported") + return p.metricFamiliesByName, p.err } for nextState := p.startOfLine; nextState != nil; nextState = nextState() { @@ -141,6 +125,31 @@ func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricF return p.metricFamiliesByName, p.err } +func (p *TextParser) hasBOM() string { + maybeBOM, err := p.buf.Peek(4) + + // Can't read, no BOM found. + if err != nil { + return "" + } + // UTF-32 BOM. + if len(maybeBOM) == 4 && + ((maybeBOM[0] == 0x00 && maybeBOM[1] == 0x00 && maybeBOM[2] == 0xFE && maybeBOM[3] == 0xFF) || + (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE && maybeBOM[2] == 0x00 && maybeBOM[3] == 0x00)) { + return "UTF-32" + } + // UTF-16 BOM. + if len(maybeBOM) >= 2 && ((maybeBOM[0] == 0xFE && maybeBOM[1] == 0xFF) || (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE)) { + return "UTF-16" + } + // UTF-8 BOM. + if len(maybeBOM) >= 3 && maybeBOM[0] == 0xEF && maybeBOM[1] == 0xBB && maybeBOM[2] == 0xBF { + return "UTF-8" + } + // No BOM. + return "" +} + func (p *TextParser) reset(in io.Reader) { p.metricFamiliesByName = map[string]*dto.MetricFamily{} if p.buf == nil {