Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions expfmt/text_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ type TextParser struct {
// input concurrently, instantiate a separate Parser for each goroutine.
func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) {
p.reset(in)

// Some clients might throw in Unicode BOM chars which is explicitly forbidden.
if bomType := p.hasBOM(); bomType != "" {
p.parseError(bomType + " BOM detected but not supported")
return p.metricFamiliesByName, p.err
}

for nextState := p.startOfLine; nextState != nil; nextState = nextState() {
// Magic happens here...
}
Expand All @@ -118,6 +125,31 @@ func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricF
return p.metricFamiliesByName, p.err
}

func (p *TextParser) hasBOM() string {
maybeBOM, err := p.buf.Peek(4)

// Can't read, no BOM found.
if err != nil {
return ""
}
// UTF-32 BOM.
if len(maybeBOM) == 4 &&
((maybeBOM[0] == 0x00 && maybeBOM[1] == 0x00 && maybeBOM[2] == 0xFE && maybeBOM[3] == 0xFF) ||
(maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE && maybeBOM[2] == 0x00 && maybeBOM[3] == 0x00)) {
return "UTF-32"
}
// UTF-16 BOM.
if len(maybeBOM) >= 2 && ((maybeBOM[0] == 0xFE && maybeBOM[1] == 0xFF) || (maybeBOM[0] == 0xFF && maybeBOM[1] == 0xFE)) {
return "UTF-16"
}
// UTF-8 BOM.
if len(maybeBOM) >= 3 && maybeBOM[0] == 0xEF && maybeBOM[1] == 0xBB && maybeBOM[2] == 0xBF {
return "UTF-8"
}
// No BOM.
return ""
}

func (p *TextParser) reset(in io.Reader) {
p.metricFamiliesByName = map[string]*dto.MetricFamily{}
if p.buf == nil {
Expand Down
25 changes: 25 additions & 0 deletions expfmt/text_parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,31 @@ metric_bucket{le="bla"} 3.14
in: "metric{l=\"\xbd\"} 3.14\n",
err: "text format parsing error in line 1: invalid label value \"\\xbd\"",
},
// 20: UTF-8 BOM present.
{
in: "\xef\xbb\xbfafter_utf8_bom 1\n",
err: "text format parsing error in line 0: UTF-8 BOM detected but not supported",
},
// 21: UTF-16 LE BOM present.
{
in: "\xfe\xffafter_utf16le_bom 1\n",
err: "text format parsing error in line 0: UTF-16 BOM detected but not supported",
},
// 22: UTF-16 BE BOM present.
{
in: "\xff\xfeafter_utf16be_bom 1\n",
err: "text format parsing error in line 0: UTF-16 BOM detected but not supported",
},
// 23: UTF-32 LE BOM present.
{
in: "\xff\xfe\x00\x00after_utf32le_bom 1\n",
err: "text format parsing error in line 0: UTF-32 BOM detected but not supported",
},
// 24: UTF-32 BE BOM present.
{
in: "\x00\x00\xfe\xffafter_utf32be_bom 1\n",
err: "text format parsing error in line 0: UTF-32 BOM detected but not supported",
},
}

for i, scenario := range scenarios {
Expand Down