diff --git a/internal/log/format.go b/internal/log/format.go new file mode 100644 index 0000000000..4b65003334 --- /dev/null +++ b/internal/log/format.go @@ -0,0 +1,85 @@ +package log + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net" + "reflect" + "time" + + "github.com/containerd/containerd/log" +) + +const TimeFormat = log.RFC3339NanoFixed + +func FormatTime(t time.Time) string { + return t.Format(TimeFormat) +} + +// DurationFormat formats a [time.Duration] log entry. +// +// A nil value signals an error with the formatting. +type DurationFormat func(time.Duration) interface{} + +func DurationFormatString(d time.Duration) interface{} { return d.String() } +func DurationFormatSeconds(d time.Duration) interface{} { return d.Seconds() } +func DurationFormatMilliseconds(d time.Duration) interface{} { return d.Milliseconds() } + +// FormatIO formats net.Conn and other types that have an `Addr()` or `Name()`. +// +// See FormatEnabled for more information. +func FormatIO(ctx context.Context, v interface{}) string { + m := make(map[string]string) + m["type"] = reflect.TypeOf(v).String() + + switch t := v.(type) { + case net.Conn: + m["localAddress"] = formatAddr(t.LocalAddr()) + m["remoteAddress"] = formatAddr(t.RemoteAddr()) + case interface{ Addr() net.Addr }: + m["address"] = formatAddr(t.Addr()) + default: + return Format(ctx, t) + } + + return Format(ctx, m) +} + +func formatAddr(a net.Addr) string { + return a.Network() + "://" + a.String() +} + +// Format formats an object into a JSON string, without any indendtation or +// HTML escapes. +// Context is used to output a log waring if the conversion fails. +// +// This is intended primarily for `trace.StringAttribute()` +func Format(ctx context.Context, v interface{}) string { + b, err := encode(v) + if err != nil { + G(ctx).WithError(err).Warning("could not format value") + return "" + } + + return string(b) +} + +func encode(v interface{}) ([]byte, error) { + return encodeBuffer(&bytes.Buffer{}, v) +} + +func encodeBuffer(buf *bytes.Buffer, v interface{}) ([]byte, error) { + enc := json.NewEncoder(buf) + enc.SetEscapeHTML(false) + enc.SetIndent("", "") + + if err := enc.Encode(v); err != nil { + err = fmt.Errorf("could not marshall %T to JSON for logging: %w", v, err) + return nil, err + } + + // encoder.Encode appends a newline to the end + return bytes.TrimSpace(buf.Bytes()), nil +} diff --git a/internal/log/hook.go b/internal/log/hook.go index 8f89405923..94c6d0918f 100644 --- a/internal/log/hook.go +++ b/internal/log/hook.go @@ -1,23 +1,58 @@ package log import ( + "bytes" + "reflect" + "time" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/containerd/containerd/log" "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) -// Hook serves to intercept and format `logrus.Entry`s before they are passed -// to the ETW hook. +const nullString = "null" + +// Hook intercepts and formats a [logrus.Entry] before it logged. // -// The containerd shim discards the (formatted) logrus output, and outputs only via ETW. -// The Linux GCS outputs logrus entries over stdout, which is consumed by the shim and -// then re-output via the ETW hook. -type Hook struct{} +// The shim either outputs the logs through an ETW hook, discarding the (formatted) output +// or logs output to a pipe for logging binaries to consume. +// The Linux GCS outputs logrus entries over stdout, which is then consumed and re-output +// by the shim. +type Hook struct { + // EncodeAsJSON formats structs, maps, arrays, slices, and [bytes.Buffer] as JSON. + // Variables of [bytes.Buffer] will be converted to []byte. + // + // Default is false. + EncodeAsJSON bool + + // FormatTime specifies the format for [time.Time] variables. + // An empty string disables formatting. + // When disabled, the fall back will the JSON encoding, if enabled. + // + // Default is [github.com/containerd/containerd/log.RFC3339NanoFixed]. + TimeFormat string + + // Duration format converts a [time.Duration] fields to an appropriate encoding. + // nil disables formatting. + // When disabled, the fall back will the JSON encoding, if enabled. + // + // Default is [DurationFormatString], which appends a duration unit after the value. + DurationFormat DurationFormat + + // AddSpanContext adds [logfields.TraceID] and [logfields.SpanID] fields to + // the entry from the span context stored in [logrus.Entry.Context], if it exists. + AddSpanContext bool +} var _ logrus.Hook = &Hook{} func NewHook() *Hook { - return &Hook{} + return &Hook{ + TimeFormat: log.RFC3339NanoFixed, + DurationFormat: DurationFormatString, + AddSpanContext: true, + } } func (h *Hook) Levels() []logrus.Level { @@ -25,14 +60,108 @@ func (h *Hook) Levels() []logrus.Level { } func (h *Hook) Fire(e *logrus.Entry) (err error) { + // JSON encode, if necessary, then add span information + h.encode(e) h.addSpanContext(e) return nil } +// encode loops through all the fields in the [logrus.Entry] and encodes them according to +// the settings in [Hook]. +// If [Hook.TimeFormat] is non-empty, it will be passed to [time.Time.Format] for +// fields of type [time.Time]. +// +// If [Hook.EncodeAsJSON] is true, then fields that are not numeric, boolean, strings, or +// errors will be encoded via a [json.Marshal] (with HTML escaping disabled). +// Chanel- and function-typed fields, as well as unsafe pointers are left alone and not encoded. +// +// If [Hook.TimeFormat] and [Hook.DurationFormat] are empty and [Hook.EncodeAsJSON] is false, +// then this is a no-op. +func (h *Hook) encode(e *logrus.Entry) { + d := e.Data + + formatTime := h.TimeFormat != "" + formatDuration := h.DurationFormat != nil + if !(h.EncodeAsJSON || formatTime || formatDuration) { + return + } + + for k, v := range d { + // encode types with dedicated formatting options first + + if vv, ok := v.(time.Time); formatTime && ok { + d[k] = vv.Format(h.TimeFormat) + continue + } + + if vv, ok := v.(time.Duration); formatDuration && ok { + d[k] = h.DurationFormat(vv) + continue + } + + // general case JSON encoding + + if !h.EncodeAsJSON { + continue + } + + switch vv := v.(type) { + // built in types + // "json" marshals errors as "{}", so leave alone here + case bool, string, error, uintptr, + int8, int16, int32, int64, int, + uint8, uint32, uint64, uint, + float32, float64: + continue + + // Rather than setting d[k] = vv.String(), JSON encode []byte value, since it + // may be a binary payload and not representable as a string. + // `case bytes.Buffer,*bytes.Buffer:` resolves `vv` to `interface{}`, + // so cannot use `vv.Bytes`. + // Could move to below the `reflect.Indirect()` call below, but + // that would require additional typematching and dereferencing. + // Easier to keep these duplicate branches here. + case bytes.Buffer: + v = vv.Bytes() + case *bytes.Buffer: + v = vv.Bytes() + } + + // dereference pointer or interface variables + rv := reflect.Indirect(reflect.ValueOf(v)) + // check if `v` is a null pointer + if !rv.IsValid() { + d[k] = nullString + continue + } + + switch rv.Kind() { + case reflect.Map, reflect.Struct, reflect.Array, reflect.Slice: + default: + // Bool, [U]?Int*, Float*, Complex*, Uintptr, String: encoded as normal + // Chan, Func: not supported by json + // Interface, Pointer: dereferenced above + // UnsafePointer: not supported by json, not safe to de-reference; leave alone + continue + } + + b, err := encode(v) + if err != nil { + // Errors are written to stderr (ie, to `panic.log`) and stops the remaining + // hooks (ie, exporting to ETW) from firing. So add encoding errors to + // the entry data to be written out, but keep on processing. + d[k+"-"+logrus.ErrorKey] = err.Error() + // keep the original `v` as the value, + continue + } + d[k] = string(b) + } +} + func (h *Hook) addSpanContext(e *logrus.Entry) { ctx := e.Context - if ctx == nil { + if !h.AddSpanContext || ctx == nil { return } span := trace.FromContext(ctx) diff --git a/internal/log/scrub.go b/internal/log/scrub.go index d51e0fd89f..d1ef15096e 100644 --- a/internal/log/scrub.go +++ b/internal/log/scrub.go @@ -4,7 +4,6 @@ import ( "bytes" "encoding/json" "errors" - "strings" "sync/atomic" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" @@ -56,11 +55,11 @@ func ScrubProcessParameters(s string) (string, error) { } pp.Environment = map[string]string{_scrubbedReplacement: _scrubbedReplacement} - buf := bytes.NewBuffer(b[:0]) - if err := encode(buf, pp); err != nil { + b, err := encodeBuffer(bytes.NewBuffer(b[:0]), pp) + if err != nil { return "", err } - return strings.TrimSpace(buf.String()), nil + return string(b), nil } // ScrubBridgeCreate scrubs requests sent over the bridge of type @@ -150,21 +149,12 @@ func scrubBytes(b []byte, scrub scrubberFunc) ([]byte, error) { return nil, err } - buf := &bytes.Buffer{} - if err := encode(buf, m); err != nil { + b, err := encode(m) + if err != nil { return nil, err } - return bytes.TrimSpace(buf.Bytes()), nil -} - -func encode(buf *bytes.Buffer, v interface{}) error { - enc := json.NewEncoder(buf) - enc.SetEscapeHTML(false) - if err := enc.Encode(v); err != nil { - return err - } - return nil + return b, nil } func isRequestBase(m genMap) bool { diff --git a/internal/oc/errors.go b/internal/oc/errors.go new file mode 100644 index 0000000000..71df25b8df --- /dev/null +++ b/internal/oc/errors.go @@ -0,0 +1,69 @@ +package oc + +import ( + "errors" + "io" + "net" + "os" + + "github.com/containerd/containerd/errdefs" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// todo: break import cycle with "internal/hcs/errors.go" and reference errors defined there +// todo: add errors defined in "internal/guest/gcserror" (Hresult does not implement error) + +func toStatusCode(err error) codes.Code { + // checks if err implements GRPCStatus() *"google.golang.org/grpc/status".Status, + // wraps an error defined in "github.com/containerd/containerd/errdefs", or is a + // context timeout or cancelled error + if s, ok := status.FromError(errdefs.ToGRPC(err)); ok { + return s.Code() + } + + switch { + // case isAny(err): + // return codes.Cancelled + case isAny(err, os.ErrInvalid): + return codes.InvalidArgument + case isAny(err, os.ErrDeadlineExceeded): + return codes.DeadlineExceeded + case isAny(err, os.ErrNotExist): + return codes.NotFound + case isAny(err, os.ErrExist): + return codes.AlreadyExists + case isAny(err, os.ErrPermission): + return codes.PermissionDenied + // case isAny(err): + // return codes.ResourceExhausted + case isAny(err, os.ErrClosed, net.ErrClosed, io.ErrClosedPipe, io.ErrShortBuffer): + return codes.FailedPrecondition + // case isAny(err): + // return codes.Aborted + // case isAny(err): + // return codes.OutOfRange + // case isAny(err): + // return codes.Unimplemented + case isAny(err, io.ErrNoProgress): + return codes.Internal + // case isAny(err): + // return codes.Unavailable + case isAny(err, io.ErrShortWrite, io.ErrUnexpectedEOF): + return codes.DataLoss + // case isAny(err): + // return codes.Unauthenticated + default: + return codes.Unknown + } +} + +// isAny returns true if errors.Is is true for any of the provided errors, errs. +func isAny(err error, errs ...error) bool { + for _, e := range errs { + if errors.Is(err, e) { + return true + } + } + return false +} diff --git a/internal/oc/exporter.go b/internal/oc/exporter.go index f428bdaf72..28f8f43a93 100644 --- a/internal/oc/exporter.go +++ b/internal/oc/exporter.go @@ -3,19 +3,26 @@ package oc import ( "github.com/sirupsen/logrus" "go.opencensus.io/trace" + "google.golang.org/grpc/codes" + + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" ) -var _ = (trace.Exporter)(&LogrusExporter{}) +const spanMessage = "Span" + +var _errorCodeKey = logrus.ErrorKey + "Code" // LogrusExporter is an OpenCensus `trace.Exporter` that exports // `trace.SpanData` to logrus output. -type LogrusExporter struct { -} +type LogrusExporter struct{} + +var _ trace.Exporter = &LogrusExporter{} // ExportSpan exports `s` based on the the following rules: // -// 1. All output will contain `s.Attributes`, `s.TraceID`, `s.SpanID`, -// `s.ParentSpanID` for correlation +// 1. All output will contain `s.Attributes`, `s.SpanKind`, `s.TraceID`, +// `s.SpanID`, and `s.ParentSpanID` for correlation // // 2. Any calls to .Annotate will not be supported. // @@ -23,21 +30,57 @@ type LogrusExporter struct { // `s.Status.Code != 0` in which case it will be written at `logrus.ErrorLevel` // providing `s.Status.Message` as the error value. func (le *LogrusExporter) ExportSpan(s *trace.SpanData) { - // Combine all span annotations with traceID, spanID, parentSpanID - baseEntry := logrus.WithFields(logrus.Fields(s.Attributes)) - baseEntry.Data["traceID"] = s.TraceID.String() - baseEntry.Data["spanID"] = s.SpanID.String() - baseEntry.Data["parentSpanID"] = s.ParentSpanID.String() - baseEntry.Data["startTime"] = s.StartTime - baseEntry.Data["endTime"] = s.EndTime - baseEntry.Data["duration"] = s.EndTime.Sub(s.StartTime).String() - baseEntry.Data["name"] = s.Name - baseEntry.Time = s.StartTime + if s.DroppedAnnotationCount > 0 { + logrus.WithFields(logrus.Fields{ + "name": s.Name, + logfields.TraceID: s.TraceID.String(), + logfields.SpanID: s.SpanID.String(), + "dropped": s.DroppedAttributeCount, + "maxAttributes": len(s.Attributes), + }).Warning("span had dropped attributes") + } + + entry := log.L.Dup() + // Combine all span annotations with span data (eg, trace ID, span ID, parent span ID, + // error, status code) + // (OC) Span attributes are guaranteed to be strings, bools, or int64s, so we can + // can skip overhead in entry.WithFields() and add them directly to entry.Data. + // Preallocate ahead of time, since we should add, at most, 10 additional entries + data := make(logrus.Fields, len(entry.Data)+len(s.Attributes)+10) + + // Default log entry may have prexisting/application-wide data + for k, v := range entry.Data { + data[k] = v + } + for k, v := range s.Attributes { + data[k] = v + } + + data[logfields.Name] = s.Name + data[logfields.TraceID] = s.TraceID.String() + data[logfields.SpanID] = s.SpanID.String() + data[logfields.ParentSpanID] = s.ParentSpanID.String() + data[logfields.StartTime] = s.StartTime + data[logfields.EndTime] = s.EndTime + data[logfields.Duration] = s.EndTime.Sub(s.StartTime) + if sk := spanKindToString(s.SpanKind); sk != "" { + data["spanKind"] = sk + } level := logrus.InfoLevel if s.Status.Code != 0 { level = logrus.ErrorLevel - baseEntry.Data[logrus.ErrorKey] = s.Status.Message + + // don't overwrite an existing "error" or "errorCode" attributes + if _, ok := data[logrus.ErrorKey]; !ok { + data[logrus.ErrorKey] = s.Status.Message + } + if _, ok := data[_errorCodeKey]; !ok { + data[_errorCodeKey] = codes.Code(s.Status.Code).String() + } } - baseEntry.Log(level, "Span") + + entry.Data = data + entry.Time = s.StartTime + entry.Log(level, spanMessage) } diff --git a/internal/oc/span.go b/internal/oc/span.go index 0e2b7e9bf6..7260784326 100644 --- a/internal/oc/span.go +++ b/internal/oc/span.go @@ -14,8 +14,7 @@ var DefaultSampler = trace.AlwaysSample() func SetSpanStatus(span *trace.Span, err error) { status := trace.Status{} if err != nil { - // TODO: JTERRY75 - Handle errors in a non-generic way - status.Code = trace.StatusCodeUnknown + status.Code = int32(toStatusCode(err)) status.Message = err.Error() } span.SetStatus(status) @@ -46,3 +45,14 @@ func update(ctx context.Context, s *trace.Span) (context.Context, *trace.Span) { var WithServerSpanKind = trace.WithSpanKind(trace.SpanKindServer) var WithClientSpanKind = trace.WithSpanKind(trace.SpanKindClient) + +func spanKindToString(sk int) string { + switch sk { + case trace.SpanKindClient: + return "client" + case trace.SpanKindServer: + return "server" + default: + return "" + } +}