From f8181e60f2cde9cbb29cb4de13fd9f7260500a0d Mon Sep 17 00:00:00 2001 From: Graham Whaley Date: Thu, 28 Nov 2019 13:19:19 +0000 Subject: [PATCH 1/3] metrics: collected: No points on high density graphs Adding points (geom_point()) to the line graphs for high node density cases just adds 'fuzz' to the graphs, in fact, for very high pod count cases (2k for instance), the points all merge together, and you effectively end up with lines that are the width of the point, which then merge into each other. Similar to the parallel.R code, only add points to the graphs if we have a few samples (<=20 by default), where they can add visibility benefits. Signed-off-by: Graham Whaley --- .../report_dockerfile/collectd_scaling.R | 172 +++++++++++------- 1 file changed, 107 insertions(+), 65 deletions(-) diff --git a/metrics/report/report_dockerfile/collectd_scaling.R b/metrics/report/report_dockerfile/collectd_scaling.R index b6c963f1..f4143c65 100755 --- a/metrics/report/report_dockerfile/collectd_scaling.R +++ b/metrics/report/report_dockerfile/collectd_scaling.R @@ -30,6 +30,9 @@ cpustats=c() # Statistics for cpu usage bootstats=c() # Statistics for boot (launch) times inodestats=c() # Statistics for inode usage +skip_points=0 # If we have a lot of samples, do not add data points to the graphs +skip_points_limit=100 # The limit above which we do not draw points + # iterate over every set of results (test run) for (currentdir in resultdirs) { # For every results file we are interested in evaluating @@ -341,6 +344,10 @@ for (currentdir in resultdirs) { num_pods = local_bootdata$n_pods[length(local_bootdata$n_pods)] + if (num_pods > skip_points_limit) { + skip_points=1 + } + # We get data in b, but want the graphs in Gb. memtotal = memtotal / (1024*1024*1024) gb_per_pod = memtotal/num_pods @@ -420,16 +427,9 @@ mem_line_plot <- ggplot() + aes(s_offset, mem_free_gb, colour=interaction(testname, node), group=interaction(testname, node)), alpha=0.3) + - geom_point(data=memfreedata, - aes(s_offset, mem_free_gb, colour=interaction(testname, node), - group=interaction(testname, node)), - alpha=0.5, size=0.5) + geom_line(data=podbootdata, aes(x=s_offset, y=n_pods*mem_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + - geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*mem_scale, colour=interaction(testname,"pod count"), group=testname), - alpha=0.3, size=0.5) + labs(colour="") + xlab("seconds") + ylab("System Avail (Gb)") + @@ -437,6 +437,18 @@ mem_line_plot <- ggplot() + ggtitle("System Memory free") + theme(axis.text.x=element_text(angle=90)) +if (skip_points == 0 ) { + mem_line_plot = mem_line_plot + + geom_point(data=memfreedata, + aes(s_offset, mem_free_gb, colour=interaction(testname, node), + group=interaction(testname, node)), + alpha=0.5, size=0.5) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*mem_scale, colour=interaction(testname,"pod count"), + group=testname), + alpha=0.3, size=0.5) +} + page1 = grid.arrange( mem_line_plot, mem_stats_plot, @@ -458,16 +470,9 @@ cpu_line_plot <- ggplot() + aes(x=s_offset, y=value, colour=interaction(testname, node), group=interaction(testname, node)), alpha=0.3) + - geom_point(data=cpuidledata, - aes(x=s_offset, y=value, colour=interaction(testname, node), - group=interaction(testname, node)), - alpha=0.5, size=0.5) + geom_line(data=podbootdata, aes(x=s_offset, y=n_pods*cpu_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + - geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*cpu_scale, colour=interaction(testname,"pod count"), group=testname), - alpha=0.3, size=0.5) + labs(colour="") + scale_y_continuous(labels=comma, sec.axis=sec_axis(~ ./cpu_scale, name="pods")) + xlab("seconds") + @@ -475,6 +480,18 @@ cpu_line_plot <- ggplot() + ggtitle("System CPU usage") + theme(axis.text.x=element_text(angle=90)) +if (skip_points == 0 ) { + cpu_line_plot = cpu_line_plot + + geom_point(data=cpuidledata, + aes(x=s_offset, y=value, colour=interaction(testname, node), + group=interaction(testname, node)), + alpha=0.5, size=0.5) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*cpu_scale, colour=interaction(testname,"pod count"), + group=testname), + alpha=0.3, size=0.5) +} + page2 = grid.arrange( cpu_line_plot, cpu_stats_plot, @@ -520,16 +537,9 @@ inode_line_plot <- ggplot() + aes(x=s_offset, y=value, colour=interaction(testname, node), group=interaction(testname, node)), alpha=0.2) + - geom_point(data=inodefreedata, - aes(x=s_offset, y=value, colour=interaction(testname, node), - group=interaction(testname, node)), - alpha=0.5, size=0.5) + geom_line(data=podbootdata, aes(x=s_offset, y=n_pods*inode_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + - geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*inode_scale, colour=interaction(testname,"pod count"), group=testname), - alpha=0.3, size=0.5) + labs(colour="") + xlab("seconds") + ylab("inodes free") + @@ -537,6 +547,18 @@ inode_line_plot <- ggplot() + ggtitle("inodes free") + theme(axis.text.x=element_text(angle=90)) +if (skip_points == 0 ) { + inode_line_plot = inode_line_plot + + geom_point(data=inodefreedata, + aes(x=s_offset, y=value, colour=interaction(testname, node), + group=interaction(testname, node)), + alpha=0.5, size=0.5) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*inode_scale, colour=interaction(testname,"pod count"), + group=testname), + alpha=0.3, size=0.5) +} + page4 = grid.arrange( inode_line_plot, inode_stats_plot, @@ -554,24 +576,13 @@ interface_packet_line_plot <- ggplot() + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), group=interaction(testname, node, name, "tx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=ifpacketdata, - aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), - group=interaction(testname, node, name, "tx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=ifpacketdata, aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), group=interaction(testname, node, name, "rx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=ifpacketdata, - aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), - group=interaction(testname, node, name, "rx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=podbootdata, aes(x=s_offset, y=n_pods*ip_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + - geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*ip_scale, colour=interaction(testname,"pod count"), group=testname), - alpha=0.3, size=0.5) + labs(colour="") + xlab("seconds") + ylab("packets") + @@ -579,6 +590,22 @@ interface_packet_line_plot <- ggplot() + ggtitle("interface packets") + theme(axis.text.x=element_text(angle=90)) +if (skip_points == 0 ) { + interface_packet_line_plot = interface_packet_line_plot + + geom_point(data=ifpacketdata, + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), + group=interaction(testname, node, name, "tx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=ifpacketdata, + aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), + group=interaction(testname, node, name, "rx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*ip_scale, colour=interaction(testname,"pod count"), + group=testname), + alpha=0.3, size=0.5) +} + oct_scale = max(c(max(ifoctetdata$tx, na.rm=TRUE), max(ifoctetdata$rx, na.rm=TRUE))) / max(podbootdata$n_pods) interface_octet_line_plot <- ggplot() + @@ -586,24 +613,13 @@ interface_octet_line_plot <- ggplot() + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), group=interaction(testname, node, name, "tx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=ifoctetdata, - aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), - group=interaction(testname, node, name, "tx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=ifoctetdata, aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), group=interaction(testname, node, name, "rx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=ifoctetdata, - aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), - group=interaction(testname, node, name, "rx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=podbootdata, aes(x=s_offset, y=n_pods*oct_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + - geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*oct_scale, colour=interaction(testname,"pod count"), group=testname), - alpha=0.3, size=0.5) + labs(colour="") + xlab("seconds") + ylab("octets") + @@ -611,6 +627,22 @@ interface_octet_line_plot <- ggplot() + ggtitle("interface octets") + theme(axis.text.x=element_text(angle=90)) +if (skip_points == 0 ) { + interface_octet_line_plot = interface_octet_line_plot + + geom_point(data=ifoctetdata, + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), + group=interaction(testname, node, name, "tx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=ifoctetdata, + aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), + group=interaction(testname, node, name, "rx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*oct_scale, colour=interaction(testname,"pod count"), + group=testname), + alpha=0.3, size=0.5) +} + page5 = grid.arrange( interface_packet_line_plot, interface_octet_line_plot, @@ -630,24 +662,13 @@ interface_drop_line_plot <- ggplot() + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), group=interaction(testname, node, name, "tx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=ifdropdata, - aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), - group=interaction(testname, node, name, "tx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=ifdropdata, aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), group=interaction(testname, node, name, "rx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=ifdropdata, - aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), - group=interaction(testname, node, name, "rx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=podbootdata, aes(x=s_offset, y=n_pods*drop_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + - geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*drop_scale, colour=interaction(testname,"pod count"), group=testname), - alpha=0.3, size=0.5) + labs(colour="") + xlab("seconds") + ylab("drops") + @@ -655,6 +676,22 @@ interface_drop_line_plot <- ggplot() + ggtitle("interface drops") + theme(axis.text.x=element_text(angle=90)) +if (skip_points == 0 ) { + interface_drop_line_plot = interface_drop_line_plot + + geom_point(data=ifdropdata, + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), + group=interaction(testname, node, name, "tx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=ifdropdata, + aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), + group=interaction(testname, node, name, "rx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*drop_scale, colour=interaction(testname,"pod count"), + group=testname), + alpha=0.3, size=0.5) +} + # errors are often 0, so providing 1 so we won't scale by infinity error_scale = max(c(1, max(iferrordata$tx, na.rm=TRUE), @@ -664,24 +701,13 @@ interface_error_line_plot <- ggplot() + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), group=interaction(testname, node, name, name, "tx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=iferrordata, - aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), - group=interaction(testname, node, name, "tx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=iferrordata, aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), group=interaction(testname, node, name, "rx")), alpha=0.2, na.rm=TRUE) + - geom_point(data=iferrordata, - aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), - group=interaction(testname, node, name, "rx")), - alpha=0.5, size=0.5, na.rm=TRUE) + geom_line(data=podbootdata, aes(x=s_offset, y=n_pods*error_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + - geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*error_scale, colour=interaction(testname,"pod count"), group=testname), - alpha=0.3, size=0.5) + labs(colour="") + xlab("seconds") + ylab("errors") + @@ -689,6 +715,22 @@ interface_error_line_plot <- ggplot() + ggtitle("interface errors") + theme(axis.text.x=element_text(angle=90)) +if (skip_points == 0 ) { + interface_error_line_plot = interface_error_line_plot + + geom_point(data=iferrordata, + aes(x=s_offset, y=tx, colour=interaction(testname, node, name, "tx"), + group=interaction(testname, node, name, "tx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=iferrordata, + aes(x=s_offset, y=rx, colour=interaction(testname, node, name, "rx"), + group=interaction(testname, node, name, "rx")), + alpha=0.5, size=0.5, na.rm=TRUE) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*error_scale, colour=interaction(testname,"pod count"), + group=testname), + alpha=0.3, size=0.5) +} + page6 = grid.arrange( interface_drop_line_plot, interface_error_line_plot, From e10bd95ef1b1f0353f81fb5188d3f4b596f679ed Mon Sep 17 00:00:00 2001 From: Graham Whaley Date: Fri, 29 Nov 2019 11:16:33 +0000 Subject: [PATCH 2/3] metrics: collected: use linear regression for cpu calc We were using a simple last-first calculation to work out the per-pod cpu overhead - but, the cpu data is pretty noisy, so it was very easy to hit a peak or trough at one or either end, and throw the result out. Try to use a linear regression to find the trend of the slope, and work out the intercept points for the first and last sample, and thus the linear cpu reduction. Update the markdown to place the CPU data notes next to the CPU graphs. Note, this only really works for workloads that are linear, but, that was true for the previous method as well, so is no worse. If the regression model fails, then `NA` will appear in the stats table, which is probably better than printing untrue data. Signed-off-by: Graham Whaley --- .../report_dockerfile/collectd_scaling.R | 27 ++++++++++++++++++- .../report_dockerfile/metrics_report.Rmd | 2 -- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/metrics/report/report_dockerfile/collectd_scaling.R b/metrics/report/report_dockerfile/collectd_scaling.R index f4143c65..34a0b224 100755 --- a/metrics/report/report_dockerfile/collectd_scaling.R +++ b/metrics/report/report_dockerfile/collectd_scaling.R @@ -317,7 +317,27 @@ for (currentdir in resultdirs) { } min_idle_cpu=node_cpu_idle_data$value[cpu_end_index] - cputotal = cputotal + (max_idle_cpu - min_idle_cpu) + # Use a linear regression model to try and guesstimate the CPU + # cost per pod. + # We used to use the formula: + # cputotal = cputotal + (max_idle_cpu - min_idle_cpu) + # to examine the difference from the first and last sample, but, the data + # for cpu is quite noisy. This could easily lead to the first/last samples + # being sat in a peak or trough, and thus throwing out the actual value. + # Using the linear regression, at least if our measurements are fairly linear + # then maybe we get a more realistic result. + + cpu_lm=lm(value ~ epoch, data=node_cpu_idle_data[cpu_start_index:cpu_end_index,]) + inter=cpu_lm$coefficients["(Intercept)"] + coeff=cpu_lm$coefficients["epoch"] + + # Calculate the theoretical cpu values at the start/end of the pod sequence + # according to the linear model, and work out the difference (how much we have + # reduced over the whole sequence). + start_cpu=inter + (coeff * node_cpu_idle_data[cpu_start_index,]$epoch) + end_cpu=inter + (coeff * node_cpu_idle_data[cpu_end_index,]$epoch) + + cputotal = cputotal + (start_cpu - end_cpu) # get value closest to first pod launch inode_start_index=Position(function(x) x > start_time, node_inode_free_data$epoch) @@ -492,6 +512,11 @@ if (skip_points == 0 ) { alpha=0.3, size=0.5) } + +cat("The CPU usage table is calculated using a Linear Model in order to identify the trend from potentially noisy data. Values of 'NA' indicate a valid model could not be fitted to the data (possibly due to too few samples).\n\n") + +cat("> Note: CPU % is measured as a system whole - 100% represents *all* CPUs on the node.\n\n") + page2 = grid.arrange( cpu_line_plot, cpu_stats_plot, diff --git a/metrics/report/report_dockerfile/metrics_report.Rmd b/metrics/report/report_dockerfile/metrics_report.Rmd index 630365ac..38fb297c 100644 --- a/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/metrics/report/report_dockerfile/metrics_report.Rmd @@ -19,8 +19,6 @@ This [test](https://github.com/clearlinux/cloud-native-setup/metrics/scaling/k8s measures the system memory 'free' reduction, CPU idle %, free inodes, and pod boot time as it launches more and more idle `busybox` pods on a Kubernetes cluster. -> Note: CPU % is measured as a system whole - 100% represents *all* CPUs on the node. - ```{r scaling, echo=FALSE, fig.cap="K8S scaling", results='asis'} source('tidy_scaling.R') ``` From 43a18126c8edf2d56a996b46e1419c23ae15d2d0 Mon Sep 17 00:00:00 2001 From: Graham Whaley Date: Thu, 28 Nov 2019 18:06:22 +0000 Subject: [PATCH 3/3] metrics: collected: try to make a better palette For cases where we have <= 3 data sets, and no data set contains more than 9 plot lines, try to construct a more meaningful colour palette that groups the relevant test data together by colour. Signed-off-by: Graham Whaley --- .../report_dockerfile/collectd_scaling.R | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/metrics/report/report_dockerfile/collectd_scaling.R b/metrics/report/report_dockerfile/collectd_scaling.R index 34a0b224..b822a58f 100755 --- a/metrics/report/report_dockerfile/collectd_scaling.R +++ b/metrics/report/report_dockerfile/collectd_scaling.R @@ -11,7 +11,8 @@ library(gridExtra) # together. suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. suppressMessages(library(jsonlite)) # to load the data. suppressMessages(library(scales)) # For de-science notation of axis -library(tibble) # tibbles for tidy data +library(tibble) # tibbles for tidy data +library(RColorBrewer) # So we can match palette to data shape testnames=c( "k8s-rapid.*" @@ -469,6 +470,38 @@ if (skip_points == 0 ) { alpha=0.3, size=0.5) } +# For upto 3 test datasets, we try to construct a more useful colour palette +# to group the test data together by colour. +datasets=unique(memfreedata$testname) +palettes=c("Reds", "Greens", "Blues") +if (length(datasets) <= length(palettes) ) { + pal=c() + count=1 + use_palette=1 + + for (d in datasets) { + data=memfreedata[memfreedata$testname==d,] + nodes=length(unique(data$node)) + + # We can only handle upto 9 colours per test set. If we find more than that + # for any data set, abort the palette use, as we must have an n:n palette to + # apply to the plot. + if (nodes > 9) { use_palette=0 } + + # Grab the 'full' palette. If we ask for just 'n', then they get spread evenly + # from full dark to dim, and with our alpha value, the dim become unreadable. + # reverse the list, as they come out dim first, and we want 'strong' first. + p = rev(brewer.pal(9, palettes[count])) + pal = c(pal, p[1:(nodes+1)]) + + count = count + 1 + } +} + +if (use_palette == 1) { + mem_line_plot = mem_line_plot + scale_color_manual(values = pal) +} + page1 = grid.arrange( mem_line_plot, mem_stats_plot, @@ -512,6 +545,9 @@ if (skip_points == 0 ) { alpha=0.3, size=0.5) } +if (use_palette == 1) { + cpu_line_plot = cpu_line_plot + scale_color_manual(values = pal) +} cat("The CPU usage table is calculated using a Linear Model in order to identify the trend from potentially noisy data. Values of 'NA' indicate a valid model could not be fitted to the data (possibly due to too few samples).\n\n") @@ -584,6 +620,10 @@ if (skip_points == 0 ) { alpha=0.3, size=0.5) } +if (use_palette == 1) { + inode_line_plot = inode_line_plot + scale_color_manual(values = pal) +} + page4 = grid.arrange( inode_line_plot, inode_stats_plot,