Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 49 additions & 13 deletions cmd/openshift-tests/openshift-tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,30 @@ import (
"syscall"
"time"

"github.com/openshift/origin/pkg/monitor"
"github.com/openshift/origin/pkg/monitor/monitor_cmd"
"github.com/openshift/origin/pkg/monitor/resourcewatch/cmd"
"github.com/openshift/origin/pkg/riskanalysis"
testginkgo "github.com/openshift/origin/pkg/test/ginkgo"
"github.com/openshift/origin/pkg/version"
exutil "github.com/openshift/origin/test/extended/util"
"github.com/openshift/origin/test/extended/util/cluster"
"github.com/openshift/origin/test/extended/util/disruption/controlplane"
"github.com/openshift/origin/test/extended/util/disruption/externalservice"
"github.com/openshift/origin/test/extended/util/disruption/frontends"

"k8s.io/cli-runtime/pkg/genericclioptions"

"github.com/onsi/ginkgo"
"github.com/openshift/library-go/pkg/image/reference"
"github.com/openshift/library-go/pkg/serviceability"
"github.com/spf13/cobra"
"github.com/spf13/pflag"

"k8s.io/cli-runtime/pkg/genericclioptions"
utilflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/logs"
"k8s.io/klog/v2"
"k8s.io/kubectl/pkg/util/templates"

"github.com/openshift/origin/pkg/monitor"
"github.com/openshift/origin/pkg/monitor/resourcewatch/cmd"
testginkgo "github.com/openshift/origin/pkg/test/ginkgo"
"github.com/openshift/origin/pkg/version"
exutil "github.com/openshift/origin/test/extended/util"
"github.com/openshift/origin/test/extended/util/cluster"
"github.com/openshift/origin/test/extended/util/disruption/controlplane"
"github.com/openshift/origin/test/extended/util/disruption/frontends"
"github.com/onsi/ginkgo"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)

func main() {
Expand Down Expand Up @@ -76,6 +77,7 @@ func main() {
newImagesCommand(),
newRunTestCommand(),
newRunMonitorCommand(),
newTestFailureRiskAnalysisCommand(),
cmd.NewRunResourceWatchCommand(),
monitor_cmd.NewTimelineCommand(genericclioptions.IOStreams{
In: os.Stdin,
Expand Down Expand Up @@ -130,6 +132,40 @@ func newRunMonitorCommand() *cobra.Command {
return cmd
}

const sippyDefaultURL = "https://sippy.dptools.openshift.org/api/jobs/runs/risk_analysis"

func newTestFailureRiskAnalysisCommand() *cobra.Command {
riskAnalysisOpts := &riskanalysis.Options{
Out: os.Stdout,
ErrOut: os.Stderr,
}

cmd := &cobra.Command{
Use: "risk-analysis",
Short: "Performs risk analysis on test failures",
Long: templates.LongDesc(`
Uses the test failure summary json files written along-side our junit xml
files after an invocation of openshift-tests. If multiple files are present
(multiple invocations of openshift-tests) we will merge them into one.
Results are then submitted to sippy which will return an analysis of per-test
and overall risk level given historical pass rates on the failed tests.
The resulting analysis is then also written to the junit artifacts directory.
`),

RunE: func(cmd *cobra.Command, args []string) error {
return riskAnalysisOpts.Run()
},
}
cmd.Flags().StringVar(&riskAnalysisOpts.JUnitDir,
"junit-dir", riskAnalysisOpts.JUnitDir,
"The directory where test reports were written, and analysis file will be stored.")
cmd.MarkFlagRequired("junit-dir")
cmd.Flags().StringVar(&riskAnalysisOpts.SippyURL,
"sippy-url", sippyDefaultURL,
"Sippy URL API endpoint")
return cmd
}

type imagesOptions struct {
Repository string
Upstream bool
Expand Down
95 changes: 95 additions & 0 deletions pkg/riskanalysis/cmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package riskanalysis

import (
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"

"github.com/pkg/errors"
)

// Options is used to run a risk analysis to determine how severe or unusual
// the test failures in an openshift-tests run were.
type Options struct {
Out, ErrOut io.Writer
JUnitDir string
SippyURL string
}

const testFailureSummaryFilePrefix = "test-failures-summary"

// Run performs the test risk analysis by reading the output files from the test run, submitting them to sippy,
// and writing out the analysis result as a new artifact.
func (opt *Options) Run() error {
fmt.Fprintf(opt.Out, "Scanning for %s files in: %s\n", testFailureSummaryFilePrefix, opt.JUnitDir)

resultFiles, err := filepath.Glob(fmt.Sprintf("%s/%s*.json", opt.JUnitDir, testFailureSummaryFilePrefix))
if err != nil {
return err
}
fmt.Fprintf(opt.Out, "Found files: %v\n", resultFiles)

prowJobRuns := []*ProwJobRun{}
// Read each result file into a ProwJobRun struct:
for _, rf := range resultFiles {
data, err := os.ReadFile(rf)
if err != nil {
return err
}
jobRun := &ProwJobRun{}
err = json.Unmarshal(data, jobRun)
if err != nil {
return errors.Wrapf(err, "error unmarshalling ProwJob json")
}
prowJobRuns = append(prowJobRuns, jobRun)
}

// We will often have more than one output file for this job run because openshift-tests is often
// invoked multiple times (pre/post upgrade). We need to merge the data together in this case.
var finalProwJobRun *ProwJobRun
for _, pjr := range prowJobRuns {
if finalProwJobRun == nil {
finalProwJobRun = pjr
continue
}
if pjr.ProwJob.Name != finalProwJobRun.ProwJob.Name {
return fmt.Errorf("mismatched job names found in %s files, %s != %s",
testFailureSummaryFilePrefix, finalProwJobRun.ProwJob.Name, pjr.ProwJob.Name)
}
finalProwJobRun.Tests = append(finalProwJobRun.Tests, pjr.Tests...)
}

inputBytes, err := json.Marshal(finalProwJobRun)
if err != nil {
return errors.Wrap(err, "error marshalling results")
}

req, err := http.NewRequest("GET", opt.SippyURL, bytes.NewBuffer(inputBytes))
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return errors.Wrap(err, "error requesting risk analysis from sippy")
}
defer resp.Body.Close()

riskAnalysisBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return errors.Wrap(err, "error reading risk analysis request body from sippy")
}
fmt.Println("response Body:", string(riskAnalysisBytes))

outputFile := filepath.Join(opt.JUnitDir, "risk-analysis.json")
err = ioutil.WriteFile(outputFile, riskAnalysisBytes, 0644)
if err != nil {
return errors.Wrap(err, "error writing risk analysis json artifact")
}
fmt.Fprintf(opt.Out, "Successfully wrote: %s\n", outputFile)

return nil
}
30 changes: 30 additions & 0 deletions pkg/riskanalysis/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package riskanalysis

// Define types, these are subsets of the sippy APIs of the same name, copied here to eliminate a lot of the cruft.
// ProwJobRunTest defines a join table linking tests to the job runs they execute in, along with the status for
// that execution.
// We're getting dangerously close to being able to live push results after a job run.

type ProwJobRun struct {
ID int
ProwJob ProwJob
Tests []ProwJobRunTest
}

type ProwJob struct {
Name string
}

type Test struct {
Name string
}

type Suite struct {
Name string
}

type ProwJobRunTest struct {
Test Test
Suite Suite
Status int // would like to use smallint here, but gorm auto-migrate breaks trying to change the type every start
}
88 changes: 88 additions & 0 deletions pkg/riskanalysis/write_test_failure_summary.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package riskanalysis

import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"

"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
)

// WriteJobRunTestFailureSummary writes a more minimal json file summarizing a little info about the
// job run, and what tests flaked and failed. (successful tests are omitted)
// This is intended to be later submitted to sippy for a risk analysis of how unusual the
// test failures were, but that final step is handled elsewhere.
func WriteJobRunTestFailureSummary(artifactDir, timeSuffix string, finalSuiteResults *junitapi.JUnitTestSuite) error {

tests := map[string]*passFail{}

for _, testCase := range finalSuiteResults.TestCases {
if _, ok := tests[testCase.Name]; !ok {
tests[testCase.Name] = &passFail{}
}
if testCase.SkipMessage != nil {
continue
}

if testCase.FailureOutput != nil {
tests[testCase.Name].Failed = true
} else {
tests[testCase.Name].Passed = true
}
}

// If we can't parse this, we submit without it, it is not required.
jobRunID, _ := strconv.Atoi(os.Getenv("BUILD_ID"))

jr := ProwJobRun{
ID: jobRunID,
ProwJob: ProwJob{Name: os.Getenv("JOB_NAME")},
Tests: []ProwJobRunTest{},
}

for k, v := range tests {
if !v.Failed {
// if no failures, it is neither a fail nor a flake:
continue
}
if v.Failed && v.Passed {
// skip flakes for now, we're not ready to process them yet:
continue
}
jr.Tests = append(jr.Tests, ProwJobRunTest{
Test: Test{Name: k},
Suite: Suite{Name: finalSuiteResults.Name},
Status: getSippyStatusCode(v),
})
}

jsonContent, err := json.MarshalIndent(jr, "", " ")
if err != nil {
return err
}
outputFile := filepath.Join(artifactDir, fmt.Sprintf("%s%s.json",
testFailureSummaryFilePrefix, timeSuffix))
return ioutil.WriteFile(outputFile, jsonContent, 0644)
}

// passFail is a simple struct to track test names which can appear more than once.
// If both passed and failed are true, it was a flake.
type passFail struct {
Passed bool
Failed bool
}

// getSippyStatusCode returns the code sippy uses internally for each type of failure.
func getSippyStatusCode(pf *passFail) int {
switch {
case pf.Failed && pf.Passed:
return 13 // flake
case pf.Failed && !pf.Passed:
return 12 // fail
}
// we should not hit this given the above filtering
return 0
}
15 changes: 12 additions & 3 deletions pkg/test/ginkgo/cmd_runsuite.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

"github.com/onsi/ginkgo/config"
"github.com/openshift/origin/pkg/monitor"
"github.com/openshift/origin/pkg/riskanalysis"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
"k8s.io/apimachinery/pkg/util/sets"
)
Expand Down Expand Up @@ -444,11 +445,14 @@ func (opt *Options) Run(suite *TestSuite, junitSuiteName string) error {
var syntheticTestResults []*junitapi.JUnitTestCase
var syntheticFailure bool

timeSuffix := fmt.Sprintf("_%s", opt.MonitorEventsOptions.GetStartTime().
UTC().Format("20060102-150405"))

if err := opt.MonitorEventsOptions.End(ctx, restConfig, opt.JUnitDir); err != nil {
return err
}
if len(opt.JUnitDir) > 0 {
if err := opt.MonitorEventsOptions.WriteRunDataToArtifactsDir(opt.JUnitDir); err != nil {
if err := opt.MonitorEventsOptions.WriteRunDataToArtifactsDir(opt.JUnitDir, timeSuffix); err != nil {
fmt.Fprintf(opt.ErrOut, "error: Failed to write run-data: %v\n", err)
}
}
Expand Down Expand Up @@ -497,8 +501,13 @@ func (opt *Options) Run(suite *TestSuite, junitSuiteName string) error {
}

if len(opt.JUnitDir) > 0 {
if err := writeJUnitReport("junit_e2e", junitSuiteName, tests, opt.JUnitDir, duration, opt.ErrOut, syntheticTestResults...); err != nil {
fmt.Fprintf(opt.Out, "error: Unable to write e2e JUnit results: %v", err)
finalSuiteResults := generateJUnitTestSuiteResults(junitSuiteName, duration, tests, syntheticTestResults...)
if err := writeJUnitReport(finalSuiteResults, "junit_e2e", timeSuffix, opt.JUnitDir, opt.ErrOut); err != nil {
fmt.Fprintf(opt.Out, "error: Unable to write e2e JUnit xml results: %v", err)
}

if err := riskanalysis.WriteJobRunTestFailureSummary(opt.JUnitDir, timeSuffix, finalSuiteResults); err != nil {
fmt.Fprintf(opt.Out, "error: Unable to write e2e job run failures summary: %v", err)
}
}

Expand Down
4 changes: 3 additions & 1 deletion pkg/test/ginkgo/cmd_runtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ func (opt *TestOptions) Run(args []string) error {
if err := opt.MonitorEventsOptions.End(ctx, restConfig, ""); err != nil {
return err
}
if err := opt.MonitorEventsOptions.WriteRunDataToArtifactsDir(""); err != nil {
timeSuffix := fmt.Sprintf("_%s", opt.MonitorEventsOptions.GetStartTime().
UTC().Format("20060102-150405"))
if err := opt.MonitorEventsOptions.WriteRunDataToArtifactsDir("", timeSuffix); err != nil {
fmt.Fprintf(opt.ErrOut, "error: Failed to write run-data: %v\n", err)
}
}
Expand Down
15 changes: 12 additions & 3 deletions pkg/test/ginkgo/junit.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ import (
"github.com/openshift/origin/pkg/version"
)

func writeJUnitReport(filePrefix, name string, tests []*testCase, dir string, duration time.Duration, errOut io.Writer, additionalResults ...*junitapi.JUnitTestCase) error {
func generateJUnitTestSuiteResults(
name string,
duration time.Duration,
tests []*testCase,
syntheticTestResults ...*junitapi.JUnitTestCase) *junitapi.JUnitTestSuite {

s := &junitapi.JUnitTestSuite{
Name: name,
Duration: duration.Seconds(),
Expand Down Expand Up @@ -69,7 +74,7 @@ func writeJUnitReport(filePrefix, name string, tests []*testCase, dir string, du
})
}
}
for _, result := range additionalResults {
for _, result := range syntheticTestResults {
switch {
case result.SkipMessage != nil:
s.NumSkipped++
Expand All @@ -79,11 +84,15 @@ func writeJUnitReport(filePrefix, name string, tests []*testCase, dir string, du
s.NumTests++
s.TestCases = append(s.TestCases, result)
}
return s
}

func writeJUnitReport(s *junitapi.JUnitTestSuite, filePrefix, fileSuffix, dir string, errOut io.Writer) error {
out, err := xml.Marshal(s)
if err != nil {
return err
}
path := filepath.Join(dir, fmt.Sprintf("%s_%s.xml", filePrefix, time.Now().UTC().Format("20060102-150405")))
path := filepath.Join(dir, fmt.Sprintf("%s_%s.xml", filePrefix, fileSuffix))
fmt.Fprintf(errOut, "Writing JUnit report to %s\n\n", path)
return ioutil.WriteFile(path, out, 0640)
}
Expand Down
Loading