diff --git a/expfmt/text_parse.go b/expfmt/text_parse.go index ec3d86ba7..77f8cee73 100644 --- a/expfmt/text_parse.go +++ b/expfmt/text_parse.go @@ -99,6 +99,13 @@ type TextParser struct { // input concurrently, instantiate a separate Parser for each goroutine. func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) { p.reset(in) + + // Some clients might throw in Unicode BOM chars which is explicitly forbidden. + if bomType := p.hasBOM(); bomType != "" { + p.parseError(bomType + " BOM detected but not supported") + return p.metricFamiliesByName, p.err + } + for nextState := p.startOfLine; nextState != nil; nextState = nextState() { // Magic happens here... } @@ -118,6 +125,31 @@ func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricF return p.metricFamiliesByName, p.err } +func (p *TextParser) hasBOM() string { + maybeBOM, err := p.buf.Peek(4) + + // Can't read, no BOM found. + if err != nil { + return "" + } + // UTF-32 BOM. + if len(maybeBOM) == 4 && + ((maybeBOM[0] == 0x00 && maybeBOM[1] == 0x00 && maybeBOM[2] == 0xFE && maybeBOM[3] == 0xFF) || + (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE && maybeBOM[2] == 0x00 && maybeBOM[3] == 0x00)) { + return "UTF-32" + } + // UTF-16 BOM. + if len(maybeBOM) >= 2 && ((maybeBOM[0] == 0xFE && maybeBOM[1] == 0xFF) || (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE)) { + return "UTF-16" + } + // UTF-8 BOM. + if len(maybeBOM) >= 3 && maybeBOM[0] == 0xEF && maybeBOM[1] == 0xBB && maybeBOM[2] == 0xBF { + return "UTF-8" + } + // No BOM. + return "" +} + func (p *TextParser) reset(in io.Reader) { p.metricFamiliesByName = map[string]*dto.MetricFamily{} if p.buf == nil { diff --git a/expfmt/text_parse_test.go b/expfmt/text_parse_test.go index 76c951185..569ceebc9 100644 --- a/expfmt/text_parse_test.go +++ b/expfmt/text_parse_test.go @@ -564,6 +564,31 @@ metric_bucket{le="bla"} 3.14 in: "metric{l=\"\xbd\"} 3.14\n", err: "text format parsing error in line 1: invalid label value \"\\xbd\"", }, + // 20: UTF-8 BOM present. + { + in: "\xef\xbb\xbfafter_utf8_bom 1\n", + err: "text format parsing error in line 0: UTF-8 BOM detected but not supported", + }, + // 21: UTF-16 LE BOM present. + { + in: "\xfe\xffafter_utf16le_bom 1\n", + err: "text format parsing error in line 0: UTF-16 BOM detected but not supported", + }, + // 22: UTF-16 BE BOM present. + { + in: "\xff\xfeafter_utf16be_bom 1\n", + err: "text format parsing error in line 0: UTF-16 BOM detected but not supported", + }, + // 23: UTF-32 LE BOM present. + { + in: "\xff\xfe\x00\x00after_utf32le_bom 1\n", + err: "text format parsing error in line 0: UTF-32 BOM detected but not supported", + }, + // 24: UTF-32 BE BOM present. + { + in: "\x00\x00\xfe\xffafter_utf32be_bom 1\n", + err: "text format parsing error in line 0: UTF-32 BOM detected but not supported", + }, } for i, scenario := range scenarios {