From 5a7d0ba70b920a5f1a9803eb94fb6cc4095848b3 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 21 Jul 2024 12:45:50 +0200 Subject: [PATCH 1/2] mountinfo: add benchmark for unescape go test -v -test.benchmem -count=10 -run ^$ -bench BenchmarkUnescape . go: downloading golang.org/x/sys v0.1.0 goos: linux goarch: arm64 pkg: github.com/moby/sys/mountinfo BenchmarkUnescape BenchmarkUnescape-10 1000000 1068 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 992292 1082 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1050 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1038 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1034 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1045 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1071 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1033 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1032 ns/op 640 B/op 31 allocs/op BenchmarkUnescape-10 1000000 1070 ns/op 640 B/op 31 allocs/op PASS ok github.com/moby/sys/mountinfo 10.653s Signed-off-by: Sebastiaan van Stijn --- mountinfo/mountinfo_linux_test.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mountinfo/mountinfo_linux_test.go b/mountinfo/mountinfo_linux_test.go index dca55536..e78ee3f9 100644 --- a/mountinfo/mountinfo_linux_test.go +++ b/mountinfo/mountinfo_linux_test.go @@ -771,3 +771,32 @@ func TestUnescape(t *testing.T) { } } } + +func BenchmarkUnescape(b *testing.B) { + testCases := []string{ + "", + "/", + "/some/longer/path", + "/path\\040with\\040spaces", + "/path/with\\134backslash", + "/tab\\011in/path", + `/path/"with'quotes`, + `/path/"with'quotes,\040space,\011tab`, + `\12`, + `\134`, + `"'"'"'`, + `/\1345`, + `/\12x`, + `\0`, + `\x`, + "\\\\", + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for x := 0; x < len(testCases); x++ { + _, _ = unescape(testCases[x]) + } + } +} From 4446de2de1913b0f7cc6cd12e3766d002c9da03f Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 25 Jul 2024 19:27:56 -0700 Subject: [PATCH 2/2] mountinfo: simplify/speedup unescape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has been pointed out that unescape implementation is unnecessarily complex, and I tend to agree. Plus, we never expect any errors during unescaping, and if there are any, we can always return the string as is. So, drop the error return, and simplify the algorithm. Modify the test case and the benchmark accordingly. NOTE this also fixes the issue of trying to convert escape sequences where the character code is >= utf8.RuneSelf (128 decimal, 0200 octal). Adding those to the output may result in invalid utf-8 strings. The new code doesn't try to unescape those (and the tests are amended to check that). Finally, add some more test cases, and improve the documentation. Here's benchstat output: name old time/op new time/op delta Unescape-20 851ns ± 1% 358ns ± 3% -57.94% (p=0.008 n=5+5) name old alloc/op new alloc/op delta Unescape-20 640B ± 0% 255B ± 0% -60.16% (p=0.000 n=5+4) name old allocs/op new allocs/op delta Unescape-20 31.0 ± 0% 21.0 ± 0% -32.26% (p=0.008 n=5+5) Signed-off-by: Kir Kolyshkin --- mountinfo/mountinfo_linux.go | 91 +++++++++++-------------------- mountinfo/mountinfo_linux_test.go | 58 ++++++++++---------- 2 files changed, 60 insertions(+), 89 deletions(-) diff --git a/mountinfo/mountinfo_linux.go b/mountinfo/mountinfo_linux.go index 43ced2c1..54674b75 100644 --- a/mountinfo/mountinfo_linux.go +++ b/mountinfo/mountinfo_linux.go @@ -24,8 +24,6 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) { s := bufio.NewScanner(r) out := []*Info{} for s.Scan() { - var err error - /* See http://man7.org/linux/man-pages/man5/proc.5.html @@ -85,29 +83,15 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) { Parent: toInt(fields[1]), Major: toInt(major), Minor: toInt(minor), + Root: unescape(fields[3]), + Mountpoint: unescape(fields[4]), Options: fields[5], Optional: strings.Join(fields[6:sepIdx], " "), // zero or more optional fields + FSType: unescape(fields[sepIdx+1]), + Source: unescape(fields[sepIdx+2]), VFSOptions: fields[sepIdx+3], } - p.Mountpoint, err = unescape(fields[4]) - if err != nil { - return nil, fmt.Errorf("parsing '%s' failed: mount point: %w", fields[4], err) - } - p.FSType, err = unescape(fields[sepIdx+1]) - if err != nil { - return nil, fmt.Errorf("parsing '%s' failed: fstype: %w", fields[sepIdx+1], err) - } - p.Source, err = unescape(fields[sepIdx+2]) - if err != nil { - return nil, fmt.Errorf("parsing '%s' failed: source: %w", fields[sepIdx+2], err) - } - - p.Root, err = unescape(fields[3]) - if err != nil { - return nil, fmt.Errorf("parsing '%s' failed: root: %w", fields[3], err) - } - // Run the filter after parsing all fields. var skip, stop bool if filter != nil { @@ -188,62 +172,51 @@ func PidMountInfo(pid int) ([]*Info, error) { return GetMountsFromReader(f, nil) } -// A few specific characters in mountinfo path entries (root and mountpoint) -// are escaped using a backslash followed by a character's ascii code in octal. +// Some characters in some mountinfo fields may be escaped using a backslash +// followed by a three octal digits of the character's ASCII code \NNN, where +// N is 0-7, for example: // // space -- as \040 // tab (aka \t) -- as \011 // newline (aka \n) -- as \012 // backslash (aka \\) -- as \134 +// hash (aka #) -- as \043 // -// This function converts path from mountinfo back, i.e. it unescapes the above sequences. -func unescape(path string) (string, error) { - // try to avoid copying +// This function converts all such escape sequences back to ASCII, and returns +// the unescaped string. +func unescape(path string) string { + // Try to avoid copying. if strings.IndexByte(path, '\\') == -1 { - return path, nil + return path } // The following code is UTF-8 transparent as it only looks for some - // specific characters (backslash and 0..7) with values < utf8.RuneSelf, - // and everything else is passed through as is. + // specific characters (backslash and 0..7) with values less than + // utf8.RuneSelf, and everything else is passed through as is. buf := make([]byte, len(path)) bufLen := 0 for i := 0; i < len(path); i++ { - if path[i] != '\\' { - buf[bufLen] = path[i] - bufLen++ - continue - } - s := path[i:] - if len(s) < 4 { - // too short - return "", fmt.Errorf("bad escape sequence %q: too short", s) - } - c := s[1] - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7': - v := c - '0' - for j := 2; j < 4; j++ { // one digit already; two more - if s[j] < '0' || s[j] > '7' { - return "", fmt.Errorf("bad escape sequence %q: not a digit", s[:3]) - } - x := s[j] - '0' - v = (v << 3) | x - } - if v > 255 { - return "", fmt.Errorf("bad escape sequence %q: out of range" + s[:3]) - } - buf[bufLen] = v - bufLen++ + c := path[i] + // Look for \NNN, i.e. a backslash followed by three octal + // digits. Maximum value is 177 (equals utf8.RuneSelf-1). + if c == '\\' && i+3 < len(path) && + (path[i+1] == '0' || path[i+1] == '1') && + (path[i+2] >= '0' && path[i+2] <= '7') && + (path[i+3] >= '0' && path[i+3] <= '7') { + // Convert from ASCII to numeric values. + c1 := path[i+1] - '0' + c2 := path[i+2] - '0' + c3 := path[i+3] - '0' + // Each octal digit is three bits, thus the shift value. + c = c1<<6 | c2<<3 | c3 + // We read three extra bytes of input. i += 3 - continue - default: - return "", fmt.Errorf("bad escape sequence %q: not a digit" + s[:3]) - } + buf[bufLen] = c + bufLen++ } - return string(buf[:bufLen]), nil + return string(buf[:bufLen]) } // toInt converts a string to an int, and ignores any numbers parsing errors, diff --git a/mountinfo/mountinfo_linux_test.go b/mountinfo/mountinfo_linux_test.go index e78ee3f9..1ac797c6 100644 --- a/mountinfo/mountinfo_linux_test.go +++ b/mountinfo/mountinfo_linux_test.go @@ -731,44 +731,42 @@ func TestParseMountinfoExtraCases(t *testing.T) { } func TestUnescape(t *testing.T) { + // When adding test cases below, be aware that Go interprets \NNN + // inside strings enclosed in double quotes in the same way as the + // function being tested, so: + // - for input: either escape every backslash character (i.e. \\), or + // enclose the whole string in `backticks` so \NNN is passed as-is; + // - for output: write it like "\040", which is identical to " ". testCases := []struct { input, output string - isErr bool }{ - {"", "", false}, - {"/", "/", false}, - {"/some/longer/path", "/some/longer/path", false}, - {"/path\\040with\\040spaces", "/path with spaces", false}, - {"/path/with\\134backslash", "/path/with\\backslash", false}, - {"/tab\\011in/path", "/tab\tin/path", false}, - {`/path/"with'quotes`, `/path/"with'quotes`, false}, - {`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space, tab`, false}, - {`\12`, "", true}, - {`\134`, `\`, false}, - {`"'"'"'`, `"'"'"'`, false}, - {`/\1345`, `/\5`, false}, - {`/\12x`, "", true}, - {`\0`, "", true}, - {`\x`, "", true}, - {"\\\\", "", true}, + {"", ""}, + {"/", "/"}, + {"/some/longer/path", "/some/longer/path"}, + {`/path\040with\040spaces`, "/path\040with\040spaces"}, + {"/path/with\\134backslash", "/path/with\\backslash"}, + {"/tab\\011in/path", "/tab\011in/path"}, + {`/path/"with'quotes`, `/path/"with'quotes`}, + {`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space, tab`}, + {`\12`, `\12`}, // Not enough digits. + {`\134`, `\`}, // Backslash. + {`"'"'"'`, `"'"'"'`}, + {`/\1345`, `/\5`}, // Backslash with extra digit. + {`/\12x`, `/\12x`}, + {`\0`, `\0`}, // Not enough digits. + {`\000\000`, "\000\000"}, // NUL (min allowed ASCII value). + {`\x`, `\x`}, + {"\\\\", "\\\\"}, + {`\177`, "\177"}, // Max allowed ASCII value. + {`\222`, `\222`}, // Too large value -- not unescaped. + {`Это\040комон\040какой-то`, "Это комон какой-то"}, // Some UTF-8 -- not unescaped. } for _, tc := range testCases { - res, err := unescape(tc.input) - if tc.isErr == true { - if err == nil { - t.Errorf("Input %q, want error, got nil", tc.input) - } - // no more checks - continue - } + res := unescape(tc.input) if res != tc.output { t.Errorf("Input %q, want %q, got %q", tc.input, tc.output, res) } - if err != nil { - t.Errorf("Input %q, want nil, got error %v", tc.input, err) - continue - } } } @@ -796,7 +794,7 @@ func BenchmarkUnescape(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { for x := 0; x < len(testCases); x++ { - _, _ = unescape(testCases[x]) + _ = unescape(testCases[x]) } } }