From 26acb2039134fdc230ad67fed221a99986e4888d Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 30 Mar 2020 12:35:05 +0200 Subject: [PATCH 001/102] init --- dvc/cli.py | 2 + dvc/command/viz.py | 27 +++++++++ dvc/repo/__init__.py | 1 + dvc/repo/init.py | 3 + dvc/repo/viz.py | 18 ++++++ dvc/visualization.py | 141 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 192 insertions(+) create mode 100644 dvc/command/viz.py create mode 100644 dvc/repo/viz.py create mode 100644 dvc/visualization.py diff --git a/dvc/cli.py b/dvc/cli.py index 0e4a22a25c..4eee87e9f9 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -3,6 +3,7 @@ import logging import sys +from dvc.command import viz from .command import ( add, cache, @@ -74,6 +75,7 @@ version, update, git_hook, + viz, ] diff --git a/dvc/command/viz.py b/dvc/command/viz.py new file mode 100644 index 0000000000..3d4a664d98 --- /dev/null +++ b/dvc/command/viz.py @@ -0,0 +1,27 @@ +import argparse +import logging + +from dvc.command.base import append_doc_link, CmdBase + +logger = logging.getLogger(__name__) + + +class CmdViz(CmdBase): + def run(self): + self.repo.viz(self.args.targets) + + +def add_parser(subparsers, parent_parser): + VIZ_HELP = "Visualize target metric file using vega.io" + + viz_parser = subparsers.add_parser( + "viz", + parents=[parent_parser], + description=append_doc_link(VIZ_HELP, "viz"), + help=VIZ_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + viz_parser.add_argument( + "targets", nargs="+", help="Metrics file to visualize." + ) + viz_parser.set_defaults(func=CmdViz) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 33431fa4f0..5d23f18919 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -61,6 +61,7 @@ class Repo(object): from dvc.repo.get import get from dvc.repo.get_url import get_url from dvc.repo.update import update + from dvc.repo.viz import viz def __init__(self, root_dir=None): from dvc.state import State diff --git a/dvc/repo/init.py b/dvc/repo/init.py index 3238bb8e94..ef51c3fb88 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -12,6 +12,7 @@ from dvc.utils import boxify from dvc.utils import relpath from dvc.utils.fs import remove +from dvc.visualization import VisualizationTemplates logger = logging.getLogger(__name__) @@ -100,6 +101,8 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True + VisualizationTemplates.init(dvc_dir) + proj = Repo(root_dir) scm.add([config.files["repo"]]) diff --git a/dvc/repo/viz.py b/dvc/repo/viz.py new file mode 100644 index 0000000000..f5c0b9853f --- /dev/null +++ b/dvc/repo/viz.py @@ -0,0 +1,18 @@ +import logging + +from dvc.visualization import Default1DArrayTemplate + +logger = logging.getLogger(__name__) + + +def viz(repo, targets, typ="csv", viz_template=None): + if typ == "csv": + import csv + + with open(targets[0], "r") as fd: + rdr = csv.reader(fd, delimiter=",") + lines = list(rdr) + assert len(lines) == 1 + values = lines[0] + + Default1DArrayTemplate(repo.dvc_dir).save(values) diff --git a/dvc/visualization.py b/dvc/visualization.py new file mode 100644 index 0000000000..4cd873e987 --- /dev/null +++ b/dvc/visualization.py @@ -0,0 +1,141 @@ +import json +import logging +import os +import tempfile + +from dvc.utils.fs import makedirs + + +logger = logging.getLogger(__name__) + + +class AbstractTemplate: + HTML_TEMPLATE = """ + + + Embedding Vega-Lite + + + + + +
+ + + +""" + + TEMPLATES_DIR = "visualisation" + INDENT = 4 + SEPARATORS = (",", ": ") + + def __init__(self, dvc_dir): + self.dvc_dir = dvc_dir + self.visualization_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) + + def dump(self): + import json + + makedirs(self.visualization_dir, exist_ok=True) + + with open( + os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "w+" + ) as fd: + json.dump( + self.DEFAULT_CONTENT, + fd, + indent=self.INDENT, + separators=self.SEPARATORS, + ) + + def load(self): + import json + + with open( + os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" + ) as fd: + return json.load(fd) + + def fill(self, data): + raise NotImplementedError + + def save(self, data): + + vega_json = self.fill(data) + + tmp_dir = tempfile.mkdtemp("dvc-viz") + path = os.path.join(tmp_dir, "vis.html") + with open(path, "w") as fd: + fd.write(self.HTML_TEMPLATE.format(vega_json=vega_json)) + + logger.error("PATH: {}".format(path)) + + +class Default1DArrayTemplate(AbstractTemplate): + def fill(self, data): + assert isinstance(data, list) + assert not isinstance(data[0], list) + with open( + os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" + ) as fd: + content = json.load(fd) + + data_entry_template = '{{"x":{},"y":{}}},' + to_inject = "[" + for index, v in enumerate(data): + to_inject += data_entry_template.format(index, v) + to_inject += "]" + + content["data"][0]["values"] = to_inject + return str(content) + + TEMPLATE_NAME = "default_1d_array.json" + DEFAULT_CONTENT = { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "width": 500, + "height": 500, + "data": [{"name": "table", "values": ""}], + "scales": [ + { + "name": "x", + "type": "point", + "range": "width", + "domain": {"data": "table", "field": "x"}, + }, + { + "name": "y", + "type": "linear", + "range": "height", + "domain": {"data": "table", "field": "y"}, + }, + ], + "axes": [ + {"orient": "bottom", "scale": "x"}, + {"orient": "left", "scale": "y"}, + ], + "marks": [ + { + "type": "line", + "from": {"data": "table"}, + "encode": { + "enter": { + "x": {"scale": "x", "field": "x"}, + "y": {"scale": "y", "field": "y"}, + "strokeWidth": {"value": 3}, + } + }, + } + ], + } + + +TEMPLATES = [Default1DArrayTemplate] + + +class VisualizationTemplates: + @staticmethod + def init(dvc_dir): + [t(dvc_dir).dump() for t in TEMPLATES] From 61c2b5d924babf4f9e599b3b3824821e21a426bf Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 31 Mar 2020 15:38:33 +0200 Subject: [PATCH 002/102] rename to plot data insertion basig on dicts update --- dvc/cli.py | 4 +- dvc/command/{viz.py => plot.py} | 18 ++-- dvc/plot.py | 104 +++++++++++++++++++++++ dvc/repo/__init__.py | 2 +- dvc/repo/init.py | 4 +- dvc/repo/plot.py | 35 ++++++++ dvc/repo/viz.py | 18 ---- dvc/visualization.py | 141 -------------------------------- 8 files changed, 153 insertions(+), 173 deletions(-) rename dvc/command/{viz.py => plot.py} (51%) create mode 100644 dvc/plot.py create mode 100644 dvc/repo/plot.py delete mode 100644 dvc/repo/viz.py delete mode 100644 dvc/visualization.py diff --git a/dvc/cli.py b/dvc/cli.py index 4eee87e9f9..99a6f1e9bb 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -3,7 +3,7 @@ import logging import sys -from dvc.command import viz +from dvc.command import plot from .command import ( add, cache, @@ -75,7 +75,7 @@ version, update, git_hook, - viz, + plot, ] diff --git a/dvc/command/viz.py b/dvc/command/plot.py similarity index 51% rename from dvc/command/viz.py rename to dvc/command/plot.py index 3d4a664d98..ddb155d465 100644 --- a/dvc/command/viz.py +++ b/dvc/command/plot.py @@ -6,22 +6,22 @@ logger = logging.getLogger(__name__) -class CmdViz(CmdBase): +class CmdPlot(CmdBase): def run(self): - self.repo.viz(self.args.targets) + self.repo.plot(self.args.targets) def add_parser(subparsers, parent_parser): - VIZ_HELP = "Visualize target metric file using vega.io" + PLOT_HELP = "Visualize target metric file using vega.io" - viz_parser = subparsers.add_parser( - "viz", + plot_parser = subparsers.add_parser( + "plot", parents=[parent_parser], - description=append_doc_link(VIZ_HELP, "viz"), - help=VIZ_HELP, + description=append_doc_link(PLOT_HELP, "plot"), + help=PLOT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) - viz_parser.add_argument( + plot_parser.add_argument( "targets", nargs="+", help="Metrics file to visualize." ) - viz_parser.set_defaults(func=CmdViz) + plot_parser.set_defaults(func=CmdPlot) diff --git a/dvc/plot.py b/dvc/plot.py new file mode 100644 index 0000000000..630314edcf --- /dev/null +++ b/dvc/plot.py @@ -0,0 +1,104 @@ +import json +import logging +import os + +from dvc.utils.fs import makedirs + + +logger = logging.getLogger(__name__) + + +class AbstractTemplate: + HTML_TEMPLATE = """ + + + Embedding Vega-Lite + + + + + +
+ + + +""" + + TEMPLATES_DIR = "plot" + INDENT = 4 + SEPARATORS = (",", ": ") + + def __init__(self, dvc_dir): + self.dvc_dir = dvc_dir + self.plot_templates_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) + + def dump(self): + import json + + makedirs(self.plot_templates_dir, exist_ok=True) + + with open( + os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "w+" + ) as fd: + json.dump( + self.DEFAULT_CONTENT, + fd, + indent=self.INDENT, + separators=self.SEPARATORS, + ) + + def load(self): + import json + + with open( + os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" + ) as fd: + return json.load(fd) + + def fill(self, data): + raise NotImplementedError + + def save(self, update_dict, path): + vega_dict = self.fill(update_dict) + + with open(path, "w") as fd: + fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) + + logger.error("PATH: {}".format(path)) + + +class DefaultTemplate(AbstractTemplate): + TEMPLATE_NAME = "default.json" + + DEFAULT_CONTENT = { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "title": "", + "data": {"values": []}, + "mark": {"type": "line"}, + "encoding": { + "x": {"field": "x", "type": "quantitative"}, + "y": {"field": "y", "type": "quantitative"}, + "color": {"field": "revision", "type": "nominal"}, + }, + } + + def fill(self, update_dict): + with open( + os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" + ) as fd: + vega_spec = json.load(fd) + + vega_spec.update(update_dict) + return vega_spec + + +TEMPLATES = [DefaultTemplate] + + +class PlotTemplates: + @staticmethod + def init(dvc_dir): + [t(dvc_dir).dump() for t in TEMPLATES] diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 5d23f18919..879c2dd6b8 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -61,7 +61,7 @@ class Repo(object): from dvc.repo.get import get from dvc.repo.get_url import get_url from dvc.repo.update import update - from dvc.repo.viz import viz + from dvc.repo.plot import plot def __init__(self, root_dir=None): from dvc.state import State diff --git a/dvc/repo/init.py b/dvc/repo/init.py index ef51c3fb88..d8e5691e24 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -12,7 +12,7 @@ from dvc.utils import boxify from dvc.utils import relpath from dvc.utils.fs import remove -from dvc.visualization import VisualizationTemplates +from dvc.plot import PlotTemplates logger = logging.getLogger(__name__) @@ -101,7 +101,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True - VisualizationTemplates.init(dvc_dir) + PlotTemplates.init(dvc_dir) proj = Repo(root_dir) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py new file mode 100644 index 0000000000..1e008d6ce1 --- /dev/null +++ b/dvc/repo/plot.py @@ -0,0 +1,35 @@ +import logging +import os + +from dvc.plot import DefaultTemplate + +logger = logging.getLogger(__name__) + + +def create_data_dict(target, typ): + result = {} + if typ == "json": + import json + + with open(target, "r+") as fd: + data = json.load(fd) + for d in data: + d["revision"] = "HEAD" + + result["data"] = {} + result["data"]["values"] = data + result["title"] = target + return result + + +def plot(repo, targets, typ="json"): + # TODO how to handle multiple targets + target = targets[0] + vega_data_dict = create_data_dict(target, typ) + + # TODO need to pass title, probably need a way to pass additional config + # from json file + + DefaultTemplate(repo.dvc_dir).save( + vega_data_dict, os.path.join(repo.root_dir, "viz.html") + ) diff --git a/dvc/repo/viz.py b/dvc/repo/viz.py deleted file mode 100644 index f5c0b9853f..0000000000 --- a/dvc/repo/viz.py +++ /dev/null @@ -1,18 +0,0 @@ -import logging - -from dvc.visualization import Default1DArrayTemplate - -logger = logging.getLogger(__name__) - - -def viz(repo, targets, typ="csv", viz_template=None): - if typ == "csv": - import csv - - with open(targets[0], "r") as fd: - rdr = csv.reader(fd, delimiter=",") - lines = list(rdr) - assert len(lines) == 1 - values = lines[0] - - Default1DArrayTemplate(repo.dvc_dir).save(values) diff --git a/dvc/visualization.py b/dvc/visualization.py deleted file mode 100644 index 4cd873e987..0000000000 --- a/dvc/visualization.py +++ /dev/null @@ -1,141 +0,0 @@ -import json -import logging -import os -import tempfile - -from dvc.utils.fs import makedirs - - -logger = logging.getLogger(__name__) - - -class AbstractTemplate: - HTML_TEMPLATE = """ - - - Embedding Vega-Lite - - - - - -
- - - -""" - - TEMPLATES_DIR = "visualisation" - INDENT = 4 - SEPARATORS = (",", ": ") - - def __init__(self, dvc_dir): - self.dvc_dir = dvc_dir - self.visualization_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) - - def dump(self): - import json - - makedirs(self.visualization_dir, exist_ok=True) - - with open( - os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "w+" - ) as fd: - json.dump( - self.DEFAULT_CONTENT, - fd, - indent=self.INDENT, - separators=self.SEPARATORS, - ) - - def load(self): - import json - - with open( - os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" - ) as fd: - return json.load(fd) - - def fill(self, data): - raise NotImplementedError - - def save(self, data): - - vega_json = self.fill(data) - - tmp_dir = tempfile.mkdtemp("dvc-viz") - path = os.path.join(tmp_dir, "vis.html") - with open(path, "w") as fd: - fd.write(self.HTML_TEMPLATE.format(vega_json=vega_json)) - - logger.error("PATH: {}".format(path)) - - -class Default1DArrayTemplate(AbstractTemplate): - def fill(self, data): - assert isinstance(data, list) - assert not isinstance(data[0], list) - with open( - os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" - ) as fd: - content = json.load(fd) - - data_entry_template = '{{"x":{},"y":{}}},' - to_inject = "[" - for index, v in enumerate(data): - to_inject += data_entry_template.format(index, v) - to_inject += "]" - - content["data"][0]["values"] = to_inject - return str(content) - - TEMPLATE_NAME = "default_1d_array.json" - DEFAULT_CONTENT = { - "$schema": "https://vega.github.io/schema/vega/v5.json", - "width": 500, - "height": 500, - "data": [{"name": "table", "values": ""}], - "scales": [ - { - "name": "x", - "type": "point", - "range": "width", - "domain": {"data": "table", "field": "x"}, - }, - { - "name": "y", - "type": "linear", - "range": "height", - "domain": {"data": "table", "field": "y"}, - }, - ], - "axes": [ - {"orient": "bottom", "scale": "x"}, - {"orient": "left", "scale": "y"}, - ], - "marks": [ - { - "type": "line", - "from": {"data": "table"}, - "encode": { - "enter": { - "x": {"scale": "x", "field": "x"}, - "y": {"scale": "y", "field": "y"}, - "strokeWidth": {"value": 3}, - } - }, - } - ], - } - - -TEMPLATES = [Default1DArrayTemplate] - - -class VisualizationTemplates: - @staticmethod - def init(dvc_dir): - [t(dvc_dir).dump() for t in TEMPLATES] From 7a65199a4c5a86035a912d0e41ea1f905cf9c184 Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 31 Mar 2020 19:29:38 +0200 Subject: [PATCH 003/102] revision support --- dvc/command/plot.py | 12 ++++++- dvc/plot.py | 27 +++----------- dvc/repo/plot.py | 86 +++++++++++++++++++++++++++++++++++++-------- 3 files changed, 87 insertions(+), 38 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index ddb155d465..5a90979ea9 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -8,7 +8,7 @@ class CmdPlot(CmdBase): def run(self): - self.repo.plot(self.args.targets) + self.repo.plot(self.args.targets, self.args.a_rev, self.args.b_rev) def add_parser(subparsers, parent_parser): @@ -21,6 +21,16 @@ def add_parser(subparsers, parent_parser): help=PLOT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) + plot_parser.add_argument( + "--a_rev", + help="Old Git commit to compare (defaults to HEAD)", + nargs="?", + ) + plot_parser.add_argument( + "--b_rev", + help=("New Git commit to compare (defaults to the current workspace)"), + nargs="?", + ) plot_parser.add_argument( "targets", nargs="+", help="Metrics file to visualize." ) diff --git a/dvc/plot.py b/dvc/plot.py index 630314edcf..8274e6dbcb 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -9,23 +9,6 @@ class AbstractTemplate: - HTML_TEMPLATE = """ - - - Embedding Vega-Lite - - - - - -
- - - -""" TEMPLATES_DIR = "plot" INDENT = 4 @@ -61,13 +44,13 @@ def load(self): def fill(self, data): raise NotImplementedError - def save(self, update_dict, path): - vega_dict = self.fill(update_dict) + # def save(self, update_dict, path): + # vega_dict = self.fill(update_dict) - with open(path, "w") as fd: - fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) + # with open(path, "w") as fd: + # fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) - logger.error("PATH: {}".format(path)) + # logger.error("PATH: {}".format(path)) class DefaultTemplate(AbstractTemplate): diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 1e008d6ce1..9931dd832a 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,20 +1,75 @@ +import json import logging -import os from dvc.plot import DefaultTemplate logger = logging.getLogger(__name__) -def create_data_dict(target, typ): +class PageTemplate: + HTML = """ + + + + Embedding Vega-Lite + + + + + + {divs} + + """ + + @staticmethod + def save(divs, path): + page = PageTemplate.HTML.format(divs="\n".join(divs)) + with open(path, "w") as fd: + fd.write(page) + + +class DivTemplate: + HTML = """ +
+ """ + + @staticmethod + def prepare_div(vega_dict): + from shortuuid import uuid + + id = uuid() + return DivTemplate.HTML.format( + id=str(id), + vega_dict=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + ) + + +def _load(tree, target, revision="curren workspace"): + with tree.open(target, "r") as fd: + data = json.load(fd) + for d in data: + d["revision"] = revision + return data + + +def create_data_dict(repo, target, typ, a_rev=None, b_rev=None): result = {} + data = [] if typ == "json": - import json - with open(target, "r+") as fd: - data = json.load(fd) - for d in data: - d["revision"] = "HEAD" + if a_rev and b_rev: + logger.error("diff") + a_tree = repo.scm.get_tree(a_rev) + b_tree = repo.scm.get_tree(b_rev) + logger.error((a_tree, b_tree)) + data.extend(_load(a_tree, target, a_rev)) + data.extend(_load(b_tree, target, b_rev)) + else: + logger.error(str(repo.tree.tree)) + data.extend(_load(repo.tree, target)) result["data"] = {} result["data"]["values"] = data @@ -22,14 +77,15 @@ def create_data_dict(target, typ): return result -def plot(repo, targets, typ="json"): +def plot(repo, targets, a_rev=None, b_rev=None, typ="json"): # TODO how to handle multiple targets - target = targets[0] - vega_data_dict = create_data_dict(target, typ) + logger.error((a_rev, b_rev)) + divs = [] + for target in targets: + vega_data_dict = create_data_dict(repo, target, typ, a_rev, b_rev) - # TODO need to pass title, probably need a way to pass additional config - # from json file + # TODO need to pass title, probably need a way to pass additional conf - DefaultTemplate(repo.dvc_dir).save( - vega_data_dict, os.path.join(repo.root_dir, "viz.html") - ) + vega_dict = DefaultTemplate(repo.dvc_dir).fill(vega_data_dict) + divs.append(DivTemplate.prepare_div(vega_dict)) + PageTemplate.save(divs, "viz.html") From 2715fcd39e6cc5c823b403ad5bb529819f5b313d Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 11:50:53 +0200 Subject: [PATCH 004/102] roll back revision --- dvc/cli.py | 2 +- dvc/command/plot.py | 19 ++++-------- dvc/plot.py | 35 ++++++--------------- dvc/repo/init.py | 4 +-- dvc/repo/plot.py | 67 ++++++++++++----------------------------- setup.py | 1 + tests/func/test_plot.py | 37 +++++++++++++++++++++++ 7 files changed, 75 insertions(+), 90 deletions(-) create mode 100644 tests/func/test_plot.py diff --git a/dvc/cli.py b/dvc/cli.py index 99a6f1e9bb..98521646a1 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -3,7 +3,6 @@ import logging import sys -from dvc.command import plot from .command import ( add, cache, @@ -36,6 +35,7 @@ update, version, git_hook, + plot, ) from .command.base import fix_subparsers from .exceptions import DvcParserError diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 5a90979ea9..32bfd4c941 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -2,17 +2,20 @@ import logging from dvc.command.base import append_doc_link, CmdBase +from dvc.utils import format_link logger = logging.getLogger(__name__) class CmdPlot(CmdBase): def run(self): - self.repo.plot(self.args.targets, self.args.a_rev, self.args.b_rev) + self.repo.plot(self.args.targets) def add_parser(subparsers, parent_parser): - PLOT_HELP = "Visualize target metric file using vega.io" + PLOT_HELP = "Visualize target metric file using {}.".format( + format_link("https://vega.github.io") + ) plot_parser = subparsers.add_parser( "plot", @@ -22,16 +25,6 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_parser.add_argument( - "--a_rev", - help="Old Git commit to compare (defaults to HEAD)", - nargs="?", - ) - plot_parser.add_argument( - "--b_rev", - help=("New Git commit to compare (defaults to the current workspace)"), - nargs="?", - ) - plot_parser.add_argument( - "targets", nargs="+", help="Metrics file to visualize." + "targets", nargs="+", help="Metric files to visualize." ) plot_parser.set_defaults(func=CmdPlot) diff --git a/dvc/plot.py b/dvc/plot.py index 8274e6dbcb..373840397f 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -1,7 +1,6 @@ import json import logging import os - from dvc.utils.fs import makedirs @@ -9,7 +8,6 @@ class AbstractTemplate: - TEMPLATES_DIR = "plot" INDENT = 4 SEPARATORS = (",", ": ") @@ -33,25 +31,9 @@ def dump(self): separators=self.SEPARATORS, ) - def load(self): - import json - - with open( - os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" - ) as fd: - return json.load(fd) - def fill(self, data): raise NotImplementedError - # def save(self, update_dict, path): - # vega_dict = self.fill(update_dict) - - # with open(path, "w") as fd: - # fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) - - # logger.error("PATH: {}".format(path)) - class DefaultTemplate(AbstractTemplate): TEMPLATE_NAME = "default.json" @@ -68,7 +50,12 @@ class DefaultTemplate(AbstractTemplate): }, } - def fill(self, update_dict): + def fill(self, data): + assert isinstance(data, list) + assert all({"x", "y", "revision"} == set(d.keys()) for d in data) + + update_dict = {"data": {"values": data}} + with open( os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" ) as fd: @@ -78,10 +65,6 @@ def fill(self, update_dict): return vega_spec -TEMPLATES = [DefaultTemplate] - - -class PlotTemplates: - @staticmethod - def init(dvc_dir): - [t(dvc_dir).dump() for t in TEMPLATES] +def init_plot_templates(dvc_dir): + templates = [DefaultTemplate] + [t(dvc_dir).dump() for t in templates] diff --git a/dvc/repo/init.py b/dvc/repo/init.py index d8e5691e24..71f9f0d634 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -6,13 +6,13 @@ from dvc import analytics from dvc.config import Config from dvc.exceptions import InitError, InvalidArgumentError +from dvc.plot import init_plot_templates from dvc.repo import Repo from dvc.scm import SCM from dvc.scm.base import SCMError from dvc.utils import boxify from dvc.utils import relpath from dvc.utils.fs import remove -from dvc.plot import PlotTemplates logger = logging.getLogger(__name__) @@ -101,7 +101,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True - PlotTemplates.init(dvc_dir) + init_plot_templates(dvc_dir) proj = Repo(root_dir) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 9931dd832a..dd444ba78b 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -7,19 +7,17 @@ class PageTemplate: - HTML = """ - - - - Embedding Vega-Lite - - - - - - {divs} - - """ + HTML = """ + + dvc plot + + + + + + {divs} + +""" @staticmethod def save(divs, path): @@ -32,7 +30,7 @@ class DivTemplate: HTML = """
""" @@ -43,11 +41,11 @@ def prepare_div(vega_dict): id = uuid() return DivTemplate.HTML.format( id=str(id), - vega_dict=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), ) -def _load(tree, target, revision="curren workspace"): +def _load(tree, target, revision="current workspace"): with tree.open(target, "r") as fd: data = json.load(fd) for d in data: @@ -55,37 +53,10 @@ def _load(tree, target, revision="curren workspace"): return data -def create_data_dict(repo, target, typ, a_rev=None, b_rev=None): - result = {} - data = [] - if typ == "json": - - if a_rev and b_rev: - logger.error("diff") - a_tree = repo.scm.get_tree(a_rev) - b_tree = repo.scm.get_tree(b_rev) - logger.error((a_tree, b_tree)) - data.extend(_load(a_tree, target, a_rev)) - data.extend(_load(b_tree, target, b_rev)) - else: - logger.error(str(repo.tree.tree)) - data.extend(_load(repo.tree, target)) - - result["data"] = {} - result["data"]["values"] = data - result["title"] = target - return result - - -def plot(repo, targets, a_rev=None, b_rev=None, typ="json"): - # TODO how to handle multiple targets - logger.error((a_rev, b_rev)) +def plot(repo, targets, plot_filename="plot.html", typ="json"): divs = [] for target in targets: - vega_data_dict = create_data_dict(repo, target, typ, a_rev, b_rev) - - # TODO need to pass title, probably need a way to pass additional conf - - vega_dict = DefaultTemplate(repo.dvc_dir).fill(vega_data_dict) - divs.append(DivTemplate.prepare_div(vega_dict)) - PageTemplate.save(divs, "viz.html") + data = _load(repo.tree, target) + vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data) + divs.append(DivTemplate.prepare_div(vega_plot_json)) + PageTemplate.save(divs, plot_filename) diff --git a/setup.py b/setup.py index a77f5b767d..e27b30843e 100644 --- a/setup.py +++ b/setup.py @@ -129,6 +129,7 @@ def run(self): "mock-ssh-server>=0.6.0", "moto==1.3.14.dev464", "rangehttpserver==1.2.0", + "beautifulsoup4==4.4.0", ] if (sys.version_info) >= (3, 6): diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py new file mode 100644 index 0000000000..460d461a4d --- /dev/null +++ b/tests/func/test_plot.py @@ -0,0 +1,37 @@ +import json +from copy import copy + +from bs4 import BeautifulSoup +from funcy import first + +from dvc.plot import DefaultTemplate + + +def _add_revision(data, rev="current workspace"): + new_data = copy(data) + for e in new_data: + e["revision"] = rev + + return new_data + + +def test_plot_vega_compliant_json(tmp_dir, dvc): + metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + tmp_dir.dvc_gen({"metric.json": json.dumps(metric)}) + dvc.metrics.add("metric.json") + + dvc.plot(["metric.json"], "result.html") + + page = tmp_dir / "result.html" + + assert page.exists() + page_content = BeautifulSoup(page.read_text()) + + with_revision = _add_revision(metric) + expected_script_content = json.dumps( + DefaultTemplate(dvc.dvc_dir).fill(with_revision), + indent=4, + separators=(",", ": "), + ) + + assert expected_script_content in first(page_content.body.script.contents) From 46b8e204fac3d40ad097951b4e7431e33a245bc8 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 14:45:08 +0200 Subject: [PATCH 005/102] plot makedirs for backward compatibility --- dvc/plot.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dvc/plot.py b/dvc/plot.py index 373840397f..e4ac03b494 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -21,6 +21,9 @@ def dump(self): makedirs(self.plot_templates_dir, exist_ok=True) + if not os.path.exists(self.plot_templates_dir): + makedirs(self.plot_templates_dir) + with open( os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "w+" ) as fd: @@ -66,5 +69,6 @@ def fill(self, data): def init_plot_templates(dvc_dir): + templates = [DefaultTemplate] [t(dvc_dir).dump() for t in templates] From bcad3ee251ddc95893e102ea3d2243fc83f29fb7 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 14:51:06 +0200 Subject: [PATCH 006/102] log path --- dvc/command/plot.py | 1 + dvc/repo/plot.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 32bfd4c941..4d0ca2b3dc 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -10,6 +10,7 @@ class CmdPlot(CmdBase): def run(self): self.repo.plot(self.args.targets) + return 0 def add_parser(subparsers, parent_parser): diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index dd444ba78b..87be89765e 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -53,10 +53,14 @@ def _load(tree, target, revision="current workspace"): return data -def plot(repo, targets, plot_filename="plot.html", typ="json"): +def plot(repo, targets, plot_path="plot.html", typ="json"): divs = [] for target in targets: data = _load(repo.tree, target) vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data) divs.append(DivTemplate.prepare_div(vega_plot_json)) - PageTemplate.save(divs, plot_filename) + PageTemplate.save(divs, plot_path) + logger.info( + "Your can see your plot by opening '{}' in your " + "browser!".format(plot_path) + ) From 1b3d9251edffe34ac493f8ff473efac1155de6f0 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:02:54 +0200 Subject: [PATCH 007/102] pretty plot link to visualization page --- dvc/repo/plot.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 87be89765e..080051886d 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,7 +1,9 @@ import json import logging +import os from dvc.plot import DefaultTemplate +from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -61,6 +63,10 @@ def plot(repo, targets, plot_path="plot.html", typ="json"): divs.append(DivTemplate.prepare_div(vega_plot_json)) PageTemplate.save(divs, plot_path) logger.info( - "Your can see your plot by opening '{}' in your " - "browser!".format(plot_path) + "Your can see your plot by opening {} in your " + "browser!".format( + format_link( + "file://{}".format(os.path.join(repo.root_dir, plot_path)) + ) + ) ) From 906bd9a4cf69a6253320d6cdf1ce1568af647916 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:28:35 +0200 Subject: [PATCH 008/102] make target default title --- dvc/plot.py | 6 +++--- dvc/repo/plot.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index e4ac03b494..c852b669bd 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -34,7 +34,7 @@ def dump(self): separators=self.SEPARATORS, ) - def fill(self, data): + def fill(self, data, data_src=""): raise NotImplementedError @@ -53,11 +53,11 @@ class DefaultTemplate(AbstractTemplate): }, } - def fill(self, data): + def fill(self, data, data_src=""): assert isinstance(data, list) assert all({"x", "y", "revision"} == set(d.keys()) for d in data) - update_dict = {"data": {"values": data}} + update_dict = {"data": {"values": data}, "title": data_src} with open( os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 080051886d..df4713c1a7 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -59,7 +59,7 @@ def plot(repo, targets, plot_path="plot.html", typ="json"): divs = [] for target in targets: data = _load(repo.tree, target) - vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data) + vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data, target) divs.append(DivTemplate.prepare_div(vega_plot_json)) PageTemplate.save(divs, plot_path) logger.info( From 7d037677dffea19461a997956bebe3706c819d42 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:51:59 +0200 Subject: [PATCH 009/102] efiop review --- dvc/repo/plot.py | 67 +++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index df4713c1a7..7877e7b05e 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -3,13 +3,12 @@ import os from dvc.plot import DefaultTemplate +from dvc.repo import locked from dvc.utils import format_link logger = logging.getLogger(__name__) - -class PageTemplate: - HTML = """ +PAGE_HTML = """ dvc plot @@ -21,47 +20,51 @@ class PageTemplate: """ - @staticmethod - def save(divs, path): - page = PageTemplate.HTML.format(divs="\n".join(divs)) - with open(path, "w") as fd: - fd.write(page) - - -class DivTemplate: - HTML = """ -
- """ - - @staticmethod - def prepare_div(vega_dict): - from shortuuid import uuid - - id = uuid() - return DivTemplate.HTML.format( - id=str(id), - vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), - ) +DIV_HTML = """
+""" + + +def _save_plot_html(divs, path): + page = PAGE_HTML.format(divs="\n".join(divs)) + with open(path, "w") as fobj: + fobj.write(page) + + +def _prepare_div(vega_dict): + from shortuuid import uuid + + id = uuid() + return DIV_HTML.format( + id=str(id), + vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + ) def _load(tree, target, revision="current workspace"): - with tree.open(target, "r") as fd: - data = json.load(fd) + with tree.open(target, "r") as fobj: + data = json.load(fobj) for d in data: d["revision"] = revision return data -def plot(repo, targets, plot_path="plot.html", typ="json"): +@locked +def plot(repo, targets, plot_path=None, typ="json"): + + if not plot_path: + plot_path = "plot.html" + divs = [] for target in targets: data = _load(repo.tree, target) vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data, target) - divs.append(DivTemplate.prepare_div(vega_plot_json)) - PageTemplate.save(divs, plot_path) + divs.append(_prepare_div(vega_plot_json)) + + _save_plot_html(divs, plot_path) + logger.info( "Your can see your plot by opening {} in your " "browser!".format( From d968b34cce376309b92d80fb7493c5eb1ba26c44 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:53:49 +0200 Subject: [PATCH 010/102] efiop review --- tests/func/test_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 460d461a4d..bc9b9378e2 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -29,7 +29,7 @@ def test_plot_vega_compliant_json(tmp_dir, dvc): with_revision = _add_revision(metric) expected_script_content = json.dumps( - DefaultTemplate(dvc.dvc_dir).fill(with_revision), + DefaultTemplate(dvc.dvc_dir).fill(with_revision, "metric.json"), indent=4, separators=(",", ": "), ) From a80f3c9cf8a5352bf7f27f0526b5d6f3b75282fd Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 03:24:37 +0200 Subject: [PATCH 011/102] proper id generation --- dvc/repo/plot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 7877e7b05e..73f4e8184f 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,6 +1,8 @@ import json import logging import os +import random +import string from dvc.plot import DefaultTemplate from dvc.repo import locked @@ -34,9 +36,8 @@ def _save_plot_html(divs, path): def _prepare_div(vega_dict): - from shortuuid import uuid - id = uuid() + id = random.sample(string.ascii_lowercase, 8) return DIV_HTML.format( id=str(id), vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), From ffbfba7847157897c0935651bc7b0565d3034a49 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 03:28:46 +0200 Subject: [PATCH 012/102] proper id generation --- dvc/repo/plot.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 73f4e8184f..c6a93131c1 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -36,8 +36,7 @@ def _save_plot_html(divs, path): def _prepare_div(vega_dict): - - id = random.sample(string.ascii_lowercase, 8) + id = "".join(random.sample(string.ascii_lowercase, 8)) return DIV_HTML.format( id=str(id), vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), From 1511204367324b49d567ff3601ccad203c81b07d Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 16:10:42 +0200 Subject: [PATCH 013/102] add confusion matrix template --- dvc/command/plot.py | 12 ++++++- dvc/plot.py | 75 +++++++++++++++++++++++++++++------------ dvc/repo/__init__.py | 3 ++ dvc/repo/init.py | 5 +-- dvc/repo/plot.py | 11 ++++-- tests/func/test_plot.py | 43 +++++++++++++++++++++-- 6 files changed, 116 insertions(+), 33 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 4d0ca2b3dc..43ca031333 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -9,7 +9,11 @@ class CmdPlot(CmdBase): def run(self): - self.repo.plot(self.args.targets) + self.repo.plot( + self.args.targets, + plot_path=self.args.path, + template=self.args.template, + ) return 0 @@ -25,6 +29,12 @@ def add_parser(subparsers, parent_parser): help=PLOT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) + plot_parser.add_argument( + "--template", nargs="?", help="Template file to choose." + ) + plot_parser.add_argument( + "--path", nargs="?", help="Path to write plot HTML to." + ) plot_parser.add_argument( "targets", nargs="+", help="Metric files to visualize." ) diff --git a/dvc/plot.py b/dvc/plot.py index c852b669bd..270cfa7ab2 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -1,20 +1,21 @@ import json import logging import os + +from funcy import cached_property + from dvc.utils.fs import makedirs logger = logging.getLogger(__name__) -class AbstractTemplate: - TEMPLATES_DIR = "plot" +class Template: INDENT = 4 SEPARATORS = (",", ": ") - def __init__(self, dvc_dir): - self.dvc_dir = dvc_dir - self.plot_templates_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) + def __init__(self, templates_dir): + self.plot_templates_dir = templates_dir def dump(self): import json @@ -34,16 +35,25 @@ def dump(self): separators=self.SEPARATORS, ) - def fill(self, data, data_src=""): - raise NotImplementedError + @staticmethod + def fill(template_path, data, data_src=""): + assert isinstance(data, list) + assert all({"x", "y", "revision"} == set(d.keys()) for d in data) + + update_dict = {"data": {"values": data}, "title": data_src} + + with open(template_path, "r") as fd: + vega_spec = json.load(fd) + + vega_spec.update(update_dict) + return vega_spec -class DefaultTemplate(AbstractTemplate): +class DefaultLinearTemplate(Template): TEMPLATE_NAME = "default.json" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "title": "", "data": {"values": []}, "mark": {"type": "line"}, "encoding": { @@ -53,22 +63,43 @@ class DefaultTemplate(AbstractTemplate): }, } - def fill(self, data, data_src=""): - assert isinstance(data, list) - assert all({"x", "y", "revision"} == set(d.keys()) for d in data) - update_dict = {"data": {"values": data}, "title": data_src} +class DefaultConfusionTemplate(Template): + TEMPLATE_NAME = "default_confusion.json" + DEFAULT_CONTENT = { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": {"values": []}, + "mark": "rect", + "encoding": { + "x": { + "field": "x", + "type": "nominal", + "sort": "ascending", + "title": "Predicted value", + }, + "y": { + "field": "y", + "type": "nominal", + "sort": "ascending", + "title": "Actual value", + }, + "color": {"aggregate": "count", "type": "quantitative"}, + }, + } - with open( - os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" - ) as fd: - vega_spec = json.load(fd) - vega_spec.update(update_dict) - return vega_spec +class PlotTemplates: + TEMPLATES_DIR = "plot" + TEMPLATES = [DefaultLinearTemplate, DefaultConfusionTemplate] + @cached_property + def templates_dir(self): + return os.path.join(self.dvc_dir, self.TEMPLATES_DIR) -def init_plot_templates(dvc_dir): + def __init__(self, dvc_dir): + self.dvc_dir = dvc_dir - templates = [DefaultTemplate] - [t(dvc_dir).dump() for t in templates] + if not os.path.exists(self.templates_dir): + makedirs(self.templates_dir, exist_ok=True) + for t in self.TEMPLATES: + t(self.templates_dir).dump() diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 879c2dd6b8..0165c794ff 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -16,6 +16,7 @@ OutputNotFoundError, ) from dvc.path_info import PathInfo +from dvc.plot import PlotTemplates from dvc.remote.base import RemoteActionNotImplemented from dvc.utils.fs import path_isin from .graph import check_acyclic, get_pipeline, get_pipelines @@ -113,6 +114,8 @@ def __init__(self, root_dir=None): self._ignore() + self.plot_templates = PlotTemplates(self.dvc_dir) + @property def tree(self): return self._tree diff --git a/dvc/repo/init.py b/dvc/repo/init.py index 71f9f0d634..dda339bbd7 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -6,7 +6,6 @@ from dvc import analytics from dvc.config import Config from dvc.exceptions import InitError, InvalidArgumentError -from dvc.plot import init_plot_templates from dvc.repo import Repo from dvc.scm import SCM from dvc.scm.base import SCMError @@ -101,11 +100,9 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True - init_plot_templates(dvc_dir) - proj = Repo(root_dir) - scm.add([config.files["repo"]]) + scm.add([config.files["repo"], proj.plot_templates.templates_dir]) if scm.ignore_file: scm.add([os.path.join(dvc_dir, scm.ignore_file)]) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index c6a93131c1..0a39fbd2b3 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -4,7 +4,7 @@ import random import string -from dvc.plot import DefaultTemplate +from dvc.plot import Template from dvc.repo import locked from dvc.utils import format_link @@ -52,15 +52,20 @@ def _load(tree, target, revision="current workspace"): @locked -def plot(repo, targets, plot_path=None, typ="json"): +def plot(repo, targets, plot_path=None, template=None, typ="json"): if not plot_path: plot_path = "plot.html" + if not template: + template = os.path.join( + repo.plot_templates.templates_dir, "default.json" + ) + divs = [] for target in targets: data = _load(repo.tree, target) - vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data, target) + vega_plot_json = Template.fill(template, data, target) divs.append(_prepare_div(vega_plot_json)) _save_plot_html(divs, plot_path) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index bc9b9378e2..52d222fad2 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,10 +1,11 @@ import json +import os from copy import copy from bs4 import BeautifulSoup from funcy import first -from dvc.plot import DefaultTemplate +from dvc.plot import Template def _add_revision(data, rev="current workspace"): @@ -15,7 +16,7 @@ def _add_revision(data, rev="current workspace"): return new_data -def test_plot_vega_compliant_json(tmp_dir, dvc): +def test_plot_linear(tmp_dir, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] tmp_dir.dvc_gen({"metric.json": json.dumps(metric)}) dvc.metrics.add("metric.json") @@ -29,7 +30,43 @@ def test_plot_vega_compliant_json(tmp_dir, dvc): with_revision = _add_revision(metric) expected_script_content = json.dumps( - DefaultTemplate(dvc.dvc_dir).fill(with_revision, "metric.json"), + Template.fill( + os.path.join(dvc.plot_templates.templates_dir, "default.json"), + with_revision, + "metric.json", + ), + indent=4, + separators=(",", ": "), + ) + + assert expected_script_content in first(page_content.body.script.contents) + + +def test_plot_confusion(tmp_dir, dvc): + confusion_matrix = [{"x": "B", "y": "A"}, {"x": "A", "y": "A"}] + tmp_dir.dvc_gen({"metric.json": json.dumps(confusion_matrix)}) + dvc.metrics.add("metric.json") + + dvc.plot( + ["metric.json"], + "result.html", + template=os.path.join( + dvc.plot_templates.templates_dir, "default_confusion.json" + ), + ) + + page = tmp_dir / "result.html" + + assert page.exists() + page_content = BeautifulSoup(page.read_text()) + + with_revision = _add_revision(confusion_matrix) + expected_script_content = json.dumps( + Template.fill( + os.path.join(".dvc", "plot", "default_confusion.json"), + with_revision, + "metric.json", + ), indent=4, separators=(",", ": "), ) From 224f3399f2fb238cba024d1012550ef18fe4a1c1 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 17:12:57 +0200 Subject: [PATCH 014/102] refactor tests --- tests/func/test_plot.py | 94 ++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 52d222fad2..b1ca0761fa 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,74 +1,98 @@ import json import os -from copy import copy from bs4 import BeautifulSoup from funcy import first -from dvc.plot import Template +def _run_with_metric(tmp_dir, dvc, metric, metric_filename, commit=None): + tmp_dir.gen({metric_filename: json.dumps(metric)}) + dvc.run(metrics_no_cache=[metric_filename]) + if hasattr(dvc, "scm"): + dvc.scm.add([metric_filename, metric_filename + ".dvc"]) + if commit: + dvc.scm.commit(commit) -def _add_revision(data, rev="current workspace"): - new_data = copy(data) - for e in new_data: - e["revision"] = rev - return new_data - - -def test_plot_linear(tmp_dir, dvc): +def test_plot_linear(tmp_dir, scm, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - tmp_dir.dvc_gen({"metric.json": json.dumps(metric)}) - dvc.metrics.add("metric.json") + _run_with_metric(tmp_dir, dvc, metric, "metric.json", "first run") dvc.plot(["metric.json"], "result.html") page = tmp_dir / "result.html" - assert page.exists() page_content = BeautifulSoup(page.read_text()) - with_revision = _add_revision(metric) - expected_script_content = json.dumps( - Template.fill( - os.path.join(dvc.plot_templates.templates_dir, "default.json"), - with_revision, - "metric.json", - ), + expected_vega_json = json.dumps( + { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": { + "values": [ + {"x": 1, "y": 2, "revision": "current workspace"}, + {"x": 2, "y": 3, "revision": "current workspace"}, + ] + }, + "mark": {"type": "line"}, + "encoding": { + "x": {"field": "x", "type": "quantitative"}, + "y": {"field": "y", "type": "quantitative"}, + "color": {"field": "revision", "type": "nominal"}, + }, + "title": "metric.json", + }, indent=4, separators=(",", ": "), ) - assert expected_script_content in first(page_content.body.script.contents) + assert expected_vega_json in first(page_content.body.script.contents) def test_plot_confusion(tmp_dir, dvc): confusion_matrix = [{"x": "B", "y": "A"}, {"x": "A", "y": "A"}] - tmp_dir.dvc_gen({"metric.json": json.dumps(confusion_matrix)}) - dvc.metrics.add("metric.json") + _run_with_metric( + tmp_dir, dvc, confusion_matrix, "metric.json", "first run" + ) dvc.plot( ["metric.json"], "result.html", - template=os.path.join( - dvc.plot_templates.templates_dir, "default_confusion.json" - ), + os.path.join(".dvc", "plot", "default_confusion.json"), ) page = tmp_dir / "result.html" - assert page.exists() page_content = BeautifulSoup(page.read_text()) - with_revision = _add_revision(confusion_matrix) - expected_script_content = json.dumps( - Template.fill( - os.path.join(".dvc", "plot", "default_confusion.json"), - with_revision, - "metric.json", - ), + expected_vega_json = json.dumps( + { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": { + "values": [ + {"x": "B", "y": "A", "revision": "current workspace"}, + {"x": "A", "y": "A", "revision": "current workspace"}, + ] + }, + "mark": "rect", + "encoding": { + "x": { + "field": "x", + "type": "nominal", + "sort": "ascending", + "title": "Predicted value", + }, + "y": { + "field": "y", + "type": "nominal", + "sort": "ascending", + "title": "Actual value", + }, + "color": {"aggregate": "count", "type": "quantitative"}, + }, + "title": "metric.json", + }, indent=4, separators=(",", ": "), ) - assert expected_script_content in first(page_content.body.script.contents) + assert expected_vega_json in first(page_content.body.script.contents) From fffbbadaac33fe074761a9bd56e74e206659ab53 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 3 Apr 2020 15:04:23 +0200 Subject: [PATCH 015/102] plot from dvct file --- dvc/command/plot.py | 18 ++++++----- dvc/plot.py | 23 ++++++++++---- dvc/repo/plot.py | 68 ++++++++++++++++++++++++++--------------- tests/func/test_plot.py | 41 +++++++++++-------------- 4 files changed, 89 insertions(+), 61 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 43ca031333..d1de4a340c 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -1,5 +1,6 @@ import argparse import logging +import os from dvc.command.base import append_doc_link, CmdBase from dvc.utils import format_link @@ -9,10 +10,14 @@ class CmdPlot(CmdBase): def run(self): - self.repo.plot( - self.args.targets, - plot_path=self.args.path, - template=self.args.template, + path = self.repo.plot(self.args.target, template=self.args.template,) + logger.info( + "Your can see your plot by opening {} in your " + "browser!".format( + format_link( + "file://{}".format(os.path.join(self.repo.root_dir, path)) + ) + ) ) return 0 @@ -33,9 +38,6 @@ def add_parser(subparsers, parent_parser): "--template", nargs="?", help="Template file to choose." ) plot_parser.add_argument( - "--path", nargs="?", help="Path to write plot HTML to." - ) - plot_parser.add_argument( - "targets", nargs="+", help="Metric files to visualize." + "target", nargs="?", help="Metric files to visualize." ) plot_parser.set_defaults(func=CmdPlot) diff --git a/dvc/plot.py b/dvc/plot.py index 270cfa7ab2..f6103d2492 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -4,6 +4,7 @@ from funcy import cached_property +from dvc.exceptions import DvcException from dvc.utils.fs import makedirs @@ -35,16 +36,26 @@ def dump(self): separators=self.SEPARATORS, ) - @staticmethod - def fill(template_path, data, data_src=""): + def load_template(self, path): + try: + with open(path, "r") as fd: + return json.load(fd) + except FileNotFoundError: + try: + with open( + os.path.join(self.plot_templates_dir, path), "r" + ) as fd: + return json.load(fd) + except FileNotFoundError: + raise DvcException("Not in repo nor in defaults") + + def fill(self, template_path, data, data_src=""): assert isinstance(data, list) assert all({"x", "y", "revision"} == set(d.keys()) for d in data) update_dict = {"data": {"values": data}, "title": data_src} - with open(template_path, "r") as fd: - vega_spec = json.load(fd) - + vega_spec = self.load_template(template_path) vega_spec.update(update_dict) return vega_spec @@ -65,7 +76,7 @@ class DefaultLinearTemplate(Template): class DefaultConfusionTemplate(Template): - TEMPLATE_NAME = "default_confusion.json" + TEMPLATE_NAME = "cf.json" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": {"values": []}, diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 0a39fbd2b3..4f9fa6e38d 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,12 +1,12 @@ import json import logging -import os import random +import re import string +from dvc.exceptions import DvcException from dvc.plot import Template from dvc.repo import locked -from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -43,7 +43,7 @@ def _prepare_div(vega_dict): ) -def _load(tree, target, revision="current workspace"): +def _load_data(tree, target, revision="current workspace"): with tree.open(target, "r") as fobj: data = json.load(fobj) for d in data: @@ -51,30 +51,50 @@ def _load(tree, target, revision="current workspace"): return data -@locked -def plot(repo, targets, plot_path=None, template=None, typ="json"): +def _parse_plots(path): + with open(path, "r") as fobj: + content = fobj.read() - if not plot_path: - plot_path = "plot.html" + plot_regex = re.compile("") - if not template: - template = os.path.join( - repo.plot_templates.templates_dir, "default.json" - ) + plots = list(plot_regex.findall(content)) + return False, plots - divs = [] - for target in targets: - data = _load(repo.tree, target) - vega_plot_json = Template.fill(template, data, target) - divs.append(_prepare_div(vega_plot_json)) - _save_plot_html(divs, plot_path) +def _parse_plot_str(plot_str): + content = plot_str.replace("<", "") + content = content.replace(">", "") + args = content.split("::")[1:] + if len(args) == 2: + return args + elif len(args) == 1: + return args[0], "default.json" + raise DvcException("Error parsing") - logger.info( - "Your can see your plot by opening {} in your " - "browser!".format( - format_link( - "file://{}".format(os.path.join(repo.root_dir, plot_path)) - ) - ) + +def to_div(repo, plot_str): + datafile, templatefile = _parse_plot_str(plot_str) + + data = _load_data(repo.tree, datafile) + vega_plot_json = Template(repo.plot_templates.templates_dir).fill( + templatefile, data, datafile ) + return _prepare_div(vega_plot_json) + + +@locked +def plot(repo, template_file, revisions=None): + if revisions is None: + revisions = [] + + is_html, plot_strings = _parse_plots(template_file) + m = {plot_str: to_div(repo, plot_str) for plot_str in plot_strings} + + result = template_file.replace(".dvct", ".html") + if not is_html: + _save_plot_html( + [m[p] for p in plot_strings], result, + ) + return result + else: + raise NotImplementedError diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index b1ca0761fa..8ab02ecac8 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,5 +1,4 @@ import json -import os from bs4 import BeautifulSoup from funcy import first @@ -14,17 +13,22 @@ def _run_with_metric(tmp_dir, dvc, metric, metric_filename, commit=None): dvc.scm.commit(commit) -def test_plot_linear(tmp_dir, scm, dvc): +# TODO +def test_plot_in_html_file(tmp_dir): + pass + + +def test_plot_in_no_html(tmp_dir, scm, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] _run_with_metric(tmp_dir, dvc, metric, "metric.json", "first run") - dvc.plot(["metric.json"], "result.html") + template_content = "" + (tmp_dir / "template.dvct").write_text(template_content) - page = tmp_dir / "result.html" - assert page.exists() - page_content = BeautifulSoup(page.read_text()) + result = dvc.plot("template.dvct") - expected_vega_json = json.dumps( + page_content = BeautifulSoup((tmp_dir / result).read_text()) + assert json.dumps( { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": { @@ -43,9 +47,7 @@ def test_plot_linear(tmp_dir, scm, dvc): }, indent=4, separators=(",", ": "), - ) - - assert expected_vega_json in first(page_content.body.script.contents) + ) in first(page_content.body.script.contents) def test_plot_confusion(tmp_dir, dvc): @@ -53,18 +55,13 @@ def test_plot_confusion(tmp_dir, dvc): _run_with_metric( tmp_dir, dvc, confusion_matrix, "metric.json", "first run" ) + template_content = "" + (tmp_dir / "template.dvct").write_text(template_content) - dvc.plot( - ["metric.json"], - "result.html", - os.path.join(".dvc", "plot", "default_confusion.json"), - ) - - page = tmp_dir / "result.html" - assert page.exists() - page_content = BeautifulSoup(page.read_text()) + result = dvc.plot("template.dvct") - expected_vega_json = json.dumps( + page_content = BeautifulSoup((tmp_dir / result).read_text()) + assert json.dumps( { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": { @@ -93,6 +90,4 @@ def test_plot_confusion(tmp_dir, dvc): }, indent=4, separators=(",", ": "), - ) - - assert expected_vega_json in first(page_content.body.script.contents) + ) in first(page_content.body.script.contents) From 66e1151bda8a112ceb6d27426c3f907207dcc0dd Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 02:37:50 +0200 Subject: [PATCH 016/102] plot multiple initial --- dvc/command/plot.py | 90 ++++++++++++++++++++++++++++++++++------- dvc/repo/plot.py | 18 +++++++++ tests/func/test_plot.py | 47 +++++++++++++++++++++ 3 files changed, 141 insertions(+), 14 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index d1de4a340c..56d7d78688 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -2,30 +2,48 @@ import logging import os -from dvc.command.base import append_doc_link, CmdBase +from dvc.command.base import append_doc_link, CmdBase, fix_subparsers +from dvc.exceptions import DvcException from dvc.utils import format_link logger = logging.getLogger(__name__) -class CmdPlot(CmdBase): +class CmdPlotShow(CmdBase): def run(self): - path = self.repo.plot(self.args.target, template=self.args.template,) - logger.info( - "Your can see your plot by opening {} in your " - "browser!".format( - format_link( - "file://{}".format(os.path.join(self.repo.root_dir, path)) + try: + path = self.repo.plot(self.args.targets) + logger.info( + "Your can see your plot by opening {} in your " + "browser!".format( + format_link( + "file://{}".format( + os.path.join(self.repo.root_dir, path)) + ) ) ) - ) + except DvcException: + logger.exception("failed to plot metrics") + return 0 + + +class CmdPlotDiff(CmdBase): + def run(self): + try: + logger.error("Plotting diff") + self.repo.plot( + self.args.targets, revisions=[self.args.a_rev, self.args.b_rev] + ) + + except DvcException: + logger.exception("failed to plot metrics diff") + return 1 + return 0 def add_parser(subparsers, parent_parser): - PLOT_HELP = "Visualize target metric file using {}.".format( - format_link("https://vega.github.io") - ) + PLOT_HELP = "For visualisation" plot_parser = subparsers.add_parser( "plot", @@ -34,10 +52,54 @@ def add_parser(subparsers, parent_parser): help=PLOT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) - plot_parser.add_argument( + plot_subparsers = plot_parser.add_subparsers( + dest="cmd", + help="Use `dvc plot CMD --help` to display command-specific help.", + ) + + fix_subparsers(plot_subparsers) + + SHOW_HELP = "Visualize target metric file using {}.".format( + format_link("https://vega.github.io") + ) + plot_show_parser = plot_subparsers.add_parser( + "show", + parents=[parent_parser], + description=append_doc_link(SHOW_HELP, "plot/show"), + help=SHOW_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + plot_show_parser.add_argument( "--template", nargs="?", help="Template file to choose." ) plot_parser.add_argument( "target", nargs="?", help="Metric files to visualize." ) - plot_parser.set_defaults(func=CmdPlot) + plot_show_parser.set_defaults(func=CmdPlotShow) + + PLOT_DIFF_HELP = "Plot changes in metrics between commits" + " in the DVC repository, or between a commit and the workspace." + plot_diff_parser = plot_subparsers.add_parser( + "diff", + parents=[parent_parser], + description=append_doc_link(PLOT_DIFF_HELP, "plot/diff"), + help=PLOT_DIFF_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + plot_diff_parser.add_argument( + "a_rev", nargs="?", help="Old Git commit to plot" + ) + plot_diff_parser.add_argument( + "b_rev", + nargs="?", + help=("New Git commit to plot(defaults to the current workspace)"), + ) + plot_diff_parser.add_argument( + "--targets", + nargs="*", + help=( + "Metric files or directories to plot for. " + "Plots for all metric files by default." + ), + ) + plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 4f9fa6e38d..62f1c24144 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -51,6 +51,24 @@ def _load_data(tree, target, revision="current workspace"): return data +def _load_from_rev(repo, revisions, target): + data = [] + if len(revisions) == 0: + if repo.scm.is_dirty(): + data.extend(_load_data(repo.scm.get_tree("HEAD"), target, "HEAD")) + data.extend(_load_data(repo.tree, target)) + logger.error(data) + elif len(revisions) == 1: + data.extend( + _load_data(repo.scm.get_tree(revisions[0]), target, revisions[0]) + ) + data.extend(_load_data(repo.tree, target)) + else: + for rev in revisions: + data.extend(_load_data(repo.scm.get_tree(rev), target, rev)) + return data + + def _parse_plots(path): with open(path, "r") as fobj: content = fobj.read() diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 8ab02ecac8..11a9dd8a1a 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,4 +1,5 @@ import json +from copy import copy from bs4 import BeautifulSoup from funcy import first @@ -13,6 +14,18 @@ def _run_with_metric(tmp_dir, dvc, metric, metric_filename, commit=None): dvc.scm.commit(commit) +def _add_revision(data, rev="current workspace"): + new_data = copy(data) + for e in new_data: + e["revision"] = rev + + +def to_data(rev_data): + result = [] + for key, data in rev_data.items(): + result.extend(_add_revision(data, key)) + return result + # TODO def test_plot_in_html_file(tmp_dir): pass @@ -91,3 +104,37 @@ def test_plot_confusion(tmp_dir, dvc): indent=4, separators=(",", ": "), ) in first(page_content.body.script.contents) + + +def test_plot_multiple_revisions(tmp_dir, scm, dvc): + metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + tmp_dir.scm_gen({"metric.json": json.dumps(metric_1)}, commit="init") + scm.tag("v1") + + metric_2 = [{"x": 1, "y": 3}, {"x": 2, "y": 5}] + tmp_dir.scm_gen( + {"metric.json": json.dumps(metric_2)}, commit="2nd ver metric" + ) + scm.tag("v2") + + metric_3 = [{"x": 1, "y": 5}, {"x": 2, "y": 6}] + tmp_dir.scm_gen({"metric.json": json.dumps(metric_3)}, commit="new metric") + + dvc.plot( + ["metric.json"], + revisions=["HEAD", "v2", "v1"], + plot_path="result.html", + ) + page = tmp_dir / "result.html" + + assert page.exists() + + all_data = to_data({"HEAD": metric_3, "v2": metric_2, "v1": metric_1}) + expected_script_content = json.dumps( + Template(dvc.dvc_dir).fill(all_data, "metric.json"), + indent=4, + separators=(",", ": "), + ) + + page_content = BeautifulSoup(page.read_text()) + assert expected_script_content in first(page_content.body.script.contents) From 12169aa9932f0a87a84bee396445bf13de127264 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 03:06:27 +0200 Subject: [PATCH 017/102] add some missing metric file tests --- dvc/repo/plot.py | 43 +++++++++++++++++++++--------- tests/func/test_plot.py | 59 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 14 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 62f1c24144..ac53cf886a 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -43,11 +43,24 @@ def _prepare_div(vega_dict): ) -def _load_data(tree, target, revision="current workspace"): - with tree.open(target, "r") as fobj: - data = json.load(fobj) - for d in data: - d["revision"] = revision +def _load_data(repo, target, revision=None): + if revision is None: + revision = "current workspace" + tree = repo.tree + else: + tree = repo.scm.get_tree(revision) + + try: + with tree.open(target, "r") as fobj: + data = json.load(fobj) + for d in data: + d["revision"] = revision + except FileNotFoundError: + logger.warning( + "File '{}' was not found at: '{}'. It will not be " + "plotted.".format(target, revision) + ) + data = [] return data @@ -55,17 +68,21 @@ def _load_from_rev(repo, revisions, target): data = [] if len(revisions) == 0: if repo.scm.is_dirty(): - data.extend(_load_data(repo.scm.get_tree("HEAD"), target, "HEAD")) - data.extend(_load_data(repo.tree, target)) - logger.error(data) + data.extend(_load_data(repo, target, "HEAD")) + data.extend(_load_data(repo, target)) elif len(revisions) == 1: - data.extend( - _load_data(repo.scm.get_tree(revisions[0]), target, revisions[0]) - ) - data.extend(_load_data(repo.tree, target)) + data.extend(_load_data(repo, target, revisions[0])) + data.extend(_load_data(repo, target)) else: for rev in revisions: - data.extend(_load_data(repo.scm.get_tree(rev), target, rev)) + data.extend(_load_data(repo, target, rev)) + + if not data: + raise DvcException( + "Target metric: '{}' could not be found at any of '{}'".format( + target, ", ".join(revisions) + ) + ) return data diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 11a9dd8a1a..3d042a8c33 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,9 +1,13 @@ import json from copy import copy +import pytest from bs4 import BeautifulSoup from funcy import first +from dvc.exceptions import DvcException +from dvc.plot import Template + def _run_with_metric(tmp_dir, dvc, metric, metric_filename, commit=None): tmp_dir.gen({metric_filename: json.dumps(metric)}) @@ -19,6 +23,8 @@ def _add_revision(data, rev="current workspace"): for e in new_data: e["revision"] = rev + return new_data + def to_data(rev_data): result = [] @@ -125,8 +131,8 @@ def test_plot_multiple_revisions(tmp_dir, scm, dvc): revisions=["HEAD", "v2", "v1"], plot_path="result.html", ) - page = tmp_dir / "result.html" + page = tmp_dir / "result.html" assert page.exists() all_data = to_data({"HEAD": metric_3, "v2": metric_2, "v1": metric_1}) @@ -138,3 +144,54 @@ def test_plot_multiple_revisions(tmp_dir, scm, dvc): page_content = BeautifulSoup(page.read_text()) assert expected_script_content in first(page_content.body.script.contents) + + +def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): + tmp_dir.scm_gen("some_file", "content", commit="there is no metric") + scm.tag("v1") + + metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + tmp_dir.scm_gen( + {"metric.json": json.dumps(metric)}, commit="there is metric" + ) + scm.tag("v2") + + caplog.clear() + with caplog.at_level(logging.WARNING, "dvc"): + dvc.plot( + ["metric.json"], revisions=["v1", "v2"], plot_path="result.html" + ) + assert ( + first(caplog.messages) + == "File 'metric.json' was not found at: 'v1'. It will not be plotted." + ) + + page = tmp_dir / "result.html" + assert page.exists() + + data = to_data({"v2": metric}) + expected_script_content = json.dumps( + Template(dvc.dvc_dir).fill(data, "metric.json"), + indent=4, + separators=(",", ": "), + ) + + page_content = BeautifulSoup(page.read_text()) + assert expected_script_content in first(page_content.body.script.contents) + + +def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): + tmp_dir.scm_gen("some_file", "content", commit="there is no metric") + scm.tag("v1") + + tmp_dir.scm_gen( + "some_other_file", + "other content", + commit="there is no " "metric as well", + ) + scm.tag("v2") + + with pytest.raises(DvcException): + dvc.plot( + ["metric.json"], revisions=["v2", "v1"], plot_path="result.html", + ) From 4726fe8224df082e2d41e9464963fc052f78552a Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 3 Apr 2020 16:14:40 +0200 Subject: [PATCH 018/102] plot from dvct --- dvc/command/plot.py | 3 +- dvc/repo/plot.py | 41 +++++++------- tests/func/test_plot.py | 120 ++++++++++++++++++++++------------------ 3 files changed, 91 insertions(+), 73 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 56d7d78688..43a52a7502 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -18,7 +18,8 @@ def run(self): "browser!".format( format_link( "file://{}".format( - os.path.join(self.repo.root_dir, path)) + os.path.join(self.repo.root_dir, path) + ) ) ) ) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index ac53cf886a..7a4aa7c881 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -43,7 +43,7 @@ def _prepare_div(vega_dict): ) -def _load_data(repo, target, revision=None): +def _load_data(repo, datafile, revision=None): if revision is None: revision = "current workspace" tree = repo.tree @@ -51,36 +51,36 @@ def _load_data(repo, target, revision=None): tree = repo.scm.get_tree(revision) try: - with tree.open(target, "r") as fobj: + with tree.open(datafile, "r") as fobj: data = json.load(fobj) for d in data: d["revision"] = revision except FileNotFoundError: logger.warning( "File '{}' was not found at: '{}'. It will not be " - "plotted.".format(target, revision) + "plotted.".format(datafile, revision) ) data = [] return data -def _load_from_rev(repo, revisions, target): +def _load_from_rev(repo, datafile, revisions): data = [] if len(revisions) == 0: if repo.scm.is_dirty(): - data.extend(_load_data(repo, target, "HEAD")) - data.extend(_load_data(repo, target)) + data.extend(_load_data(repo, datafile, "HEAD")) + data.extend(_load_data(repo, datafile)) elif len(revisions) == 1: - data.extend(_load_data(repo, target, revisions[0])) - data.extend(_load_data(repo, target)) + data.extend(_load_data(repo, datafile, revisions[0])) + data.extend(_load_data(repo, datafile)) else: for rev in revisions: - data.extend(_load_data(repo, target, rev)) + data.extend(_load_data(repo, datafile, rev)) if not data: raise DvcException( "Target metric: '{}' could not be found at any of '{}'".format( - target, ", ".join(revisions) + datafile, ", ".join(revisions) ) ) return data @@ -107,25 +107,28 @@ def _parse_plot_str(plot_str): raise DvcException("Error parsing") -def to_div(repo, plot_str): - datafile, templatefile = _parse_plot_str(plot_str) +def to_div(repo, plot_str, revisions=None): + datafile, vega_template_file = _parse_plot_str(plot_str) - data = _load_data(repo.tree, datafile) + data = _load_from_rev(repo, datafile, revisions) vega_plot_json = Template(repo.plot_templates.templates_dir).fill( - templatefile, data, datafile + vega_template_file, data, datafile ) return _prepare_div(vega_plot_json) @locked -def plot(repo, template_file, revisions=None): - if revisions is None: +def plot(repo, dvc_template_file, revisions=None): + if not revisions: revisions = [] - is_html, plot_strings = _parse_plots(template_file) - m = {plot_str: to_div(repo, plot_str) for plot_str in plot_strings} + is_html, plot_strings = _parse_plots(dvc_template_file) + m = { + plot_str: to_div(repo, plot_str, revisions) + for plot_str in plot_strings + } - result = template_file.replace(".dvct", ".html") + result = dvc_template_file.replace(".dvct", ".html") if not is_html: _save_plot_html( [m[p] for p in plot_strings], result, diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 3d042a8c33..006ae17420 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,4 +1,5 @@ import json +import logging from copy import copy import pytest @@ -6,16 +7,17 @@ from funcy import first from dvc.exceptions import DvcException -from dvc.plot import Template -def _run_with_metric(tmp_dir, dvc, metric, metric_filename, commit=None): +def _run_with_metric(tmp_dir, metric, metric_filename, commit=None, tag=None): tmp_dir.gen({metric_filename: json.dumps(metric)}) - dvc.run(metrics_no_cache=[metric_filename]) - if hasattr(dvc, "scm"): - dvc.scm.add([metric_filename, metric_filename + ".dvc"]) + tmp_dir.dvc.run(metrics_no_cache=[metric_filename]) + if hasattr(tmp_dir.dvc, "scm"): + tmp_dir.dvc.scm.add([metric_filename, metric_filename + ".dvc"]) if commit: - dvc.scm.commit(commit) + tmp_dir.dvc.scm.commit(commit) + if tag: + tmp_dir.dvc.scm.tag(tag) def _add_revision(data, rev="current workspace"): @@ -32,6 +34,7 @@ def to_data(rev_data): result.extend(_add_revision(data, key)) return result + # TODO def test_plot_in_html_file(tmp_dir): pass @@ -39,7 +42,7 @@ def test_plot_in_html_file(tmp_dir): def test_plot_in_no_html(tmp_dir, scm, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - _run_with_metric(tmp_dir, dvc, metric, "metric.json", "first run") + _run_with_metric(tmp_dir, metric, "metric.json", "first run") template_content = "" (tmp_dir / "template.dvct").write_text(template_content) @@ -71,9 +74,7 @@ def test_plot_in_no_html(tmp_dir, scm, dvc): def test_plot_confusion(tmp_dir, dvc): confusion_matrix = [{"x": "B", "y": "A"}, {"x": "A", "y": "A"}] - _run_with_metric( - tmp_dir, dvc, confusion_matrix, "metric.json", "first run" - ) + _run_with_metric(tmp_dir, confusion_matrix, "metric.json", "first run") template_content = "" (tmp_dir / "template.dvct").write_text(template_content) @@ -114,36 +115,42 @@ def test_plot_confusion(tmp_dir, dvc): def test_plot_multiple_revisions(tmp_dir, scm, dvc): metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - tmp_dir.scm_gen({"metric.json": json.dumps(metric_1)}, commit="init") - scm.tag("v1") + _run_with_metric(tmp_dir, metric_1, "metric.json", "init", "v1") metric_2 = [{"x": 1, "y": 3}, {"x": 2, "y": 5}] - tmp_dir.scm_gen( - {"metric.json": json.dumps(metric_2)}, commit="2nd ver metric" - ) - scm.tag("v2") + _run_with_metric(tmp_dir, metric_2, "metric.json", "second", "v2") metric_3 = [{"x": 1, "y": 5}, {"x": 2, "y": 6}] - tmp_dir.scm_gen({"metric.json": json.dumps(metric_3)}, commit="new metric") + _run_with_metric(tmp_dir, metric_3, "metric.json", "third") - dvc.plot( - ["metric.json"], - revisions=["HEAD", "v2", "v1"], - plot_path="result.html", - ) + (tmp_dir / "template.dvct").write_text("") + dvc.plot("template.dvct", revisions=["HEAD", "v2", "v1"]) - page = tmp_dir / "result.html" - assert page.exists() - - all_data = to_data({"HEAD": metric_3, "v2": metric_2, "v1": metric_1}) - expected_script_content = json.dumps( - Template(dvc.dvc_dir).fill(all_data, "metric.json"), + content = BeautifulSoup((tmp_dir / "template.html").read_text()) + assert json.dumps( + { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": { + "values": [ + {"x": 1, "y": 5, "revision": "HEAD"}, + {"x": 2, "y": 6, "revision": "HEAD"}, + {"x": 1, "y": 3, "revision": "v2"}, + {"x": 2, "y": 5, "revision": "v2"}, + {"x": 1, "y": 2, "revision": "v1"}, + {"x": 2, "y": 3, "revision": "v1"}, + ] + }, + "mark": {"type": "line"}, + "encoding": { + "x": {"field": "x", "type": "quantitative"}, + "y": {"field": "y", "type": "quantitative"}, + "color": {"field": "revision", "type": "nominal"}, + }, + "title": "metric.json", + }, indent=4, separators=(",", ": "), - ) - - page_content = BeautifulSoup(page.read_text()) - assert expected_script_content in first(page_content.body.script.contents) + ) in first(content.body.script.contents) def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): @@ -151,33 +158,39 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): scm.tag("v1") metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - tmp_dir.scm_gen( - {"metric.json": json.dumps(metric)}, commit="there is metric" - ) - scm.tag("v2") + _run_with_metric(tmp_dir, metric, "metric.json", "there is metric", "v2") + + (tmp_dir / "template.dvct").write_text("") caplog.clear() with caplog.at_level(logging.WARNING, "dvc"): - dvc.plot( - ["metric.json"], revisions=["v1", "v2"], plot_path="result.html" - ) + result = dvc.plot("template.dvct", revisions=["v1", "v2"]) assert ( first(caplog.messages) == "File 'metric.json' was not found at: 'v1'. It will not be plotted." ) - page = tmp_dir / "result.html" - assert page.exists() - - data = to_data({"v2": metric}) - expected_script_content = json.dumps( - Template(dvc.dvc_dir).fill(data, "metric.json"), + page_content = BeautifulSoup((tmp_dir / result).read_text()) + assert json.dumps( + { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": { + "values": [ + {"x": 1, "y": 2, "revision": "v2"}, + {"x": 2, "y": 3, "revision": "v2"}, + ] + }, + "mark": {"type": "line"}, + "encoding": { + "x": {"field": "x", "type": "quantitative"}, + "y": {"field": "y", "type": "quantitative"}, + "color": {"field": "revision", "type": "nominal"}, + }, + "title": "metric.json", + }, indent=4, separators=(",", ": "), - ) - - page_content = BeautifulSoup(page.read_text()) - assert expected_script_content in first(page_content.body.script.contents) + ) in first(page_content.body.script.contents) def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): @@ -187,11 +200,12 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): tmp_dir.scm_gen( "some_other_file", "other content", - commit="there is no " "metric as well", + commit="there is no metric as well", ) scm.tag("v2") + (tmp_dir / "template.dvct").write_text("") + + # TODO create exception with pytest.raises(DvcException): - dvc.plot( - ["metric.json"], revisions=["v2", "v1"], plot_path="result.html", - ) + dvc.plot("template.dvct", revisions=["v2", "v1"]) From 6cb26aa737bc8100f7f2ac555ca1df3c4e4c2580 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 3 Apr 2020 17:14:36 +0200 Subject: [PATCH 019/102] brush up commands --- dvc/command/plot.py | 48 ++++++++++++++--------------------------- dvc/repo/__init__.py | 1 + dvc/repo/plot.py | 13 ++++++++++- tests/func/test_plot.py | 28 ++++++------------------ 4 files changed, 36 insertions(+), 54 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 43a52a7502..e33034fe82 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -1,6 +1,5 @@ import argparse import logging -import os from dvc.command.base import append_doc_link, CmdBase, fix_subparsers from dvc.exceptions import DvcException @@ -12,17 +11,9 @@ class CmdPlotShow(CmdBase): def run(self): try: - path = self.repo.plot(self.args.targets) - logger.info( - "Your can see your plot by opening {} in your " - "browser!".format( - format_link( - "file://{}".format( - os.path.join(self.repo.root_dir, path) - ) - ) - ) - ) + # TODO overriding datafile functionality + self.repo.plot(self.args.templatefile) + except DvcException: logger.exception("failed to plot metrics") return 0 @@ -31,10 +22,7 @@ def run(self): class CmdPlotDiff(CmdBase): def run(self): try: - logger.error("Plotting diff") - self.repo.plot( - self.args.targets, revisions=[self.args.a_rev, self.args.b_rev] - ) + self.repo.plot(self.args.template, revisions=self.args.revisions) except DvcException: logger.exception("failed to plot metrics diff") @@ -60,7 +48,7 @@ def add_parser(subparsers, parent_parser): fix_subparsers(plot_subparsers) - SHOW_HELP = "Visualize target metric file using {}.".format( + SHOW_HELP = "Visualize target dvct file using {}.".format( format_link("https://vega.github.io") ) plot_show_parser = plot_subparsers.add_parser( @@ -71,10 +59,13 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_show_parser.add_argument( - "--template", nargs="?", help="Template file to choose." + "templatefile", nargs="?", help="dvct file to visualize." ) - plot_parser.add_argument( - "target", nargs="?", help="Metric files to visualize." + plot_show_parser.add_argument( + "datafile", + nargs="?", + default=None, + help="Vega template file " "used to visualize " "data from datafile", ) plot_show_parser.set_defaults(func=CmdPlotShow) @@ -88,19 +79,12 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_diff_parser.add_argument( - "a_rev", nargs="?", help="Old Git commit to plot" - ) - plot_diff_parser.add_argument( - "b_rev", - nargs="?", - help=("New Git commit to plot(defaults to the current workspace)"), + "template", nargs="?", help=("dvct template file to process."), ) plot_diff_parser.add_argument( - "--targets", - nargs="*", - help=( - "Metric files or directories to plot for. " - "Plots for all metric files by default." - ), + "revisions", + nargs="+", + default=[], + help=("Git revisions to plot from"), ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 0165c794ff..ecb6211966 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -114,6 +114,7 @@ def __init__(self, root_dir=None): self._ignore() + # TODO is it necessary? self.plot_templates = PlotTemplates(self.dvc_dir) @property diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 7a4aa7c881..c26dc58ac8 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,5 +1,6 @@ import json import logging +import os import random import re import string @@ -7,6 +8,7 @@ from dvc.exceptions import DvcException from dvc.plot import Template from dvc.repo import locked +from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -133,6 +135,15 @@ def plot(repo, dvc_template_file, revisions=None): _save_plot_html( [m[p] for p in plot_strings], result, ) - return result else: raise NotImplementedError + + logger.info( + "Your can see your plot by opening {} in your " + "browser!".format( + format_link( + "file://{}".format(os.path.join(repo.root_dir, result)) + ) + ) + ) + return result diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 006ae17420..f773627d84 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,6 +1,5 @@ import json import logging -from copy import copy import pytest from bs4 import BeautifulSoup @@ -20,26 +19,6 @@ def _run_with_metric(tmp_dir, metric, metric_filename, commit=None, tag=None): tmp_dir.dvc.scm.tag(tag) -def _add_revision(data, rev="current workspace"): - new_data = copy(data) - for e in new_data: - e["revision"] = rev - - return new_data - - -def to_data(rev_data): - result = [] - for key, data in rev_data.items(): - result.extend(_add_revision(data, key)) - return result - - -# TODO -def test_plot_in_html_file(tmp_dir): - pass - - def test_plot_in_no_html(tmp_dir, scm, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] _run_with_metric(tmp_dir, metric, "metric.json", "first run") @@ -209,3 +188,10 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): # TODO create exception with pytest.raises(DvcException): dvc.plot("template.dvct", revisions=["v2", "v1"]) + + +# TODO +# def test_plot_in_html_file(tmp_dir): +# def test_plot_override_data_file +# def test_plot_custom_template_file +# def test_plot_multiple_plots From 8a145a1adbba7d3f7eda7d9c0908a42bb70ed198 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 3 Apr 2020 19:07:40 +0200 Subject: [PATCH 020/102] fix confusion matrix multiple plot --- dvc/plot.py | 1 + tests/func/test_plot.py | 1 + 2 files changed, 2 insertions(+) diff --git a/dvc/plot.py b/dvc/plot.py index f6103d2492..02d729cd2f 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -95,6 +95,7 @@ class DefaultConfusionTemplate(Template): "title": "Actual value", }, "color": {"aggregate": "count", "type": "quantitative"}, + "facet": {"field": "revision", "type": "nominal"}, }, } diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index f773627d84..099aca10ba 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -84,6 +84,7 @@ def test_plot_confusion(tmp_dir, dvc): "title": "Actual value", }, "color": {"aggregate": "count", "type": "quantitative"}, + "facet": {"field": "revision", "type": "nominal"}, }, "title": "metric.json", }, From 9d76c9a1ac59698b067f703c6c9232fbbbd31e82 Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 6 Apr 2020 11:25:06 +0200 Subject: [PATCH 021/102] plot: change confusion matrix data schema --- dvc/plot.py | 11 ++--------- tests/func/test_plot.py | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index 02d729cd2f..c0ba7334f6 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -51,7 +51,6 @@ def load_template(self, path): def fill(self, template_path, data, data_src=""): assert isinstance(data, list) - assert all({"x", "y", "revision"} == set(d.keys()) for d in data) update_dict = {"data": {"values": data}, "title": data_src} @@ -83,17 +82,11 @@ class DefaultConfusionTemplate(Template): "mark": "rect", "encoding": { "x": { - "field": "x", + "field": "predicted", "type": "nominal", "sort": "ascending", - "title": "Predicted value", - }, - "y": { - "field": "y", - "type": "nominal", - "sort": "ascending", - "title": "Actual value", }, + "y": {"field": "actual", "type": "nominal", "sort": "ascending"}, "color": {"aggregate": "count", "type": "quantitative"}, "facet": {"field": "revision", "type": "nominal"}, }, diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 099aca10ba..f61a2aa8a3 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -52,7 +52,10 @@ def test_plot_in_no_html(tmp_dir, scm, dvc): def test_plot_confusion(tmp_dir, dvc): - confusion_matrix = [{"x": "B", "y": "A"}, {"x": "A", "y": "A"}] + confusion_matrix = [ + {"predicted": "B", "actual": "A"}, + {"predicted": "A", "actual": "A"}, + ] _run_with_metric(tmp_dir, confusion_matrix, "metric.json", "first run") template_content = "" (tmp_dir / "template.dvct").write_text(template_content) @@ -65,23 +68,29 @@ def test_plot_confusion(tmp_dir, dvc): "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": { "values": [ - {"x": "B", "y": "A", "revision": "current workspace"}, - {"x": "A", "y": "A", "revision": "current workspace"}, + { + "predicted": "B", + "actual": "A", + "revision": "current workspace", + }, + { + "predicted": "A", + "actual": "A", + "revision": "current workspace", + }, ] }, "mark": "rect", "encoding": { "x": { - "field": "x", + "field": "predicted", "type": "nominal", "sort": "ascending", - "title": "Predicted value", }, "y": { - "field": "y", + "field": "actual", "type": "nominal", "sort": "ascending", - "title": "Actual value", }, "color": {"aggregate": "count", "type": "quantitative"}, "facet": {"field": "revision", "type": "nominal"}, From 26b56442cbc7dac143eaeaba642336a939e1fdb6 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 8 Apr 2020 17:30:50 +0200 Subject: [PATCH 022/102] should be working as intended --- dvc/command/plot.py | 21 +++- dvc/plot.py | 109 +++++++++++++---- dvc/repo/plot.py | 180 ++++++++++++++------------- tests/func/test_plot.py | 265 +++++++++++++++++++++++++++------------- 4 files changed, 379 insertions(+), 196 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index e33034fe82..09aa4cf88e 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -12,7 +12,7 @@ class CmdPlotShow(CmdBase): def run(self): try: # TODO overriding datafile functionality - self.repo.plot(self.args.templatefile) + self.repo.plot(self.args.datafile, self.args.template) except DvcException: logger.exception("failed to plot metrics") @@ -22,7 +22,11 @@ def run(self): class CmdPlotDiff(CmdBase): def run(self): try: - self.repo.plot(self.args.template, revisions=self.args.revisions) + self.repo.plot( + self.args.datafile, + self.args.template, + revisions=self.args.revisions, + ) except DvcException: logger.exception("failed to plot metrics diff") @@ -59,7 +63,7 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_show_parser.add_argument( - "templatefile", nargs="?", help="dvct file to visualize." + "--template", nargs="?", default=None, help="dvct file to visualize." ) plot_show_parser.add_argument( "datafile", @@ -79,7 +83,16 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_diff_parser.add_argument( - "template", nargs="?", help=("dvct template file to process."), + "--template", + nargs="?", + default=None, + help=("dvct template file to " "process."), + ) + plot_diff_parser.add_argument( + "--datafile", + nargs="?", + default=None, + help="Vega template file " "used to visualize " "data from datafile", ) plot_diff_parser.add_argument( "revisions", diff --git a/dvc/plot.py b/dvc/plot.py index c0ba7334f6..4768cd587e 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -1,6 +1,9 @@ import json import logging import os +import random +import re +import string from funcy import cached_property @@ -10,31 +13,68 @@ logger = logging.getLogger(__name__) +PAGE_HTML = """ + + dvc plot + + + + + + {divs} + +""" + +DIV_HTML = """
+""" + + +def _save_plot_html(divs, path): + page = PAGE_HTML.format(divs="\n".join(divs)) + with open(path, "w") as fobj: + fobj.write(page) + + +def _prepare_div(vega_dict): + id = "".join(random.sample(string.ascii_lowercase, 8)) + return DIV_HTML.format( + id=str(id), + vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + ) + class Template: INDENT = 4 SEPARATORS = (",", ": ") + EXTENTION = ".dvct" + METRIC_DATA_STRING = "" def __init__(self, templates_dir): self.plot_templates_dir = templates_dir def dump(self): - import json - makedirs(self.plot_templates_dir, exist_ok=True) if not os.path.exists(self.plot_templates_dir): makedirs(self.plot_templates_dir) - with open( - os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "w+" - ) as fd: - json.dump( - self.DEFAULT_CONTENT, - fd, - indent=self.INDENT, - separators=self.SEPARATORS, - ) + div = _prepare_div(self.DEFAULT_CONTENT) + + _save_plot_html( + [div], + os.path.join( + self.plot_templates_dir, self.TEMPLATE_NAME + self.EXTENTION + ), + ) + # json.dump( + # self.DEFAULT_CONTENT, + # fd, + # indent=self.INDENT, + # separators=self.SEPARATORS, + # ) def load_template(self, path): try: @@ -49,22 +89,45 @@ def load_template(self, path): except FileNotFoundError: raise DvcException("Not in repo nor in defaults") - def fill(self, template_path, data, data_src=""): - assert isinstance(data, list) - - update_dict = {"data": {"values": data}, "title": data_src} - - vega_spec = self.load_template(template_path) - vega_spec.update(update_dict) - return vega_spec + @staticmethod + def parse_data_placeholders(template_path): + regex = re.compile("") + with open(template_path, "r") as fobj: + template_content = fobj.read() + matches = regex.findall(template_content) + data_files = [ + m.replace("<", "") + .replace(">", "") + .replace("DVC_METRIC_DATA::", "") + for m in matches + ] + return data_files + + @staticmethod + def fill(template_path, data): + with open(template_path, "r") as fobj: + template_str = fobj.read() + regex = re.compile('""') + matches = regex.findall(template_str) + + result_path = os.path.basename(template_path).replace(".dvct", ".html") + result_content = template_str.replace( + matches[0], + json.dumps( + data, indent=Template.INDENT, separators=Template.SEPARATORS + ), + ) + with open(result_path, "w") as fobj: + fobj.write(result_content) + return result_path class DefaultLinearTemplate(Template): - TEMPLATE_NAME = "default.json" + TEMPLATE_NAME = "default" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": {"values": []}, + "data": {"values": Template.METRIC_DATA_STRING}, "mark": {"type": "line"}, "encoding": { "x": {"field": "x", "type": "quantitative"}, @@ -75,10 +138,10 @@ class DefaultLinearTemplate(Template): class DefaultConfusionTemplate(Template): - TEMPLATE_NAME = "cf.json" + TEMPLATE_NAME = "cf" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": {"values": []}, + "data": {"values": Template.METRIC_DATA_STRING}, "mark": "rect", "encoding": { "x": { diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index c26dc58ac8..82eea2917a 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,9 +1,10 @@ +import csv import json import logging import os -import random import re -import string + +from funcy import first from dvc.exceptions import DvcException from dvc.plot import Template @@ -12,40 +13,41 @@ logger = logging.getLogger(__name__) -PAGE_HTML = """ - - dvc plot - - - - - - {divs} - -""" - -DIV_HTML = """
-""" - - -def _save_plot_html(divs, path): - page = PAGE_HTML.format(divs="\n".join(divs)) - with open(path, "w") as fobj: - fobj.write(page) - - -def _prepare_div(vega_dict): - id = "".join(random.sample(string.ascii_lowercase, 8)) - return DIV_HTML.format( - id=str(id), - vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), - ) +def _all_dict_of_length_one(data): + return all([isinstance(e, dict) and len(e) == 1 for e in data]) + + +def _load_from_tree(tree, datafile, default_plot=False): + if datafile.endswith(".json"): + with tree.open(datafile, "r") as fobj: + data = json.load(fobj) + assert isinstance(data, list) + + if default_plot: + assert all(len(e) >= 1 for e in data) + last_key = list(first(data).keys())[-1] + data = [{"y": d[last_key], "x": i} for i, d in enumerate(data)] + elif datafile.endswith(".csv"): + with tree.open(datafile, "r") as fobj: + if default_plot: + data = [] + for index, row in enumerate(csv.reader(fobj)): + assert len(row) >= 1 + if index == 0 and len(row) > 1: + # skip header + continue + data.append({"y": row[-1], "x": index}) + else: + data = [ + row + for row in (csv.DictReader(fobj, skipinitialspace=True)) + ] -def _load_data(repo, datafile, revision=None): + return data + + +def _load_from_revision(repo, datafile, revision=None, default_plot=False): if revision is None: revision = "current workspace" tree = repo.tree @@ -53,10 +55,9 @@ def _load_data(repo, datafile, revision=None): tree = repo.scm.get_tree(revision) try: - with tree.open(datafile, "r") as fobj: - data = json.load(fobj) - for d in data: - d["revision"] = revision + data = _load_from_tree(tree, datafile, default_plot) + for d in data: + d["revision"] = revision except FileNotFoundError: logger.warning( "File '{}' was not found at: '{}'. It will not be " @@ -66,18 +67,27 @@ def _load_data(repo, datafile, revision=None): return data -def _load_from_rev(repo, datafile, revisions): +def _load_from_revisions(repo, datafile, revisions, default_plot=False): + # TODO those _load_from_revision calls data = [] if len(revisions) == 0: if repo.scm.is_dirty(): - data.extend(_load_data(repo, datafile, "HEAD")) - data.extend(_load_data(repo, datafile)) + data.extend( + _load_from_revision(repo, datafile, "HEAD", default_plot) + ) + data.extend( + _load_from_revision(repo, datafile, default_plot=default_plot) + ) elif len(revisions) == 1: - data.extend(_load_data(repo, datafile, revisions[0])) - data.extend(_load_data(repo, datafile)) + data.extend( + _load_from_revision(repo, datafile, revisions[0], default_plot) + ) + data.extend( + _load_from_revision(repo, datafile, default_plot=default_plot) + ) else: for rev in revisions: - data.extend(_load_data(repo, datafile, rev)) + data.extend(_load_from_revision(repo, datafile, rev, default_plot)) if not data: raise DvcException( @@ -88,62 +98,58 @@ def _load_from_rev(repo, datafile, revisions): return data -def _parse_plots(path): - with open(path, "r") as fobj: - content = fobj.read() - - plot_regex = re.compile("") - - plots = list(plot_regex.findall(content)) - return False, plots - - -def _parse_plot_str(plot_str): - content = plot_str.replace("<", "") - content = content.replace(">", "") - args = content.split("::")[1:] - if len(args) == 2: - return args - elif len(args) == 1: - return args[0], "default.json" - raise DvcException("Error parsing") - +def _evaluate_templatepath(repo, template): + if os.path.exists(template): + return template + else: + # TODO + logger.debug("Template '{}' not found, checking in plot dir.") + plots_dir_path = os.path.join( + repo.plot_templates.templates_dir, template + ) + if os.path.exists(plots_dir_path): + return plots_dir_path + else: + regex = re.compile(template + ".*") + for t in os.listdir(repo.plot_templates.templates_dir): + if regex.match(t): + return os.path.join(repo.plot_templates.templates_dir, t) + raise DvcException("No template found") -def to_div(repo, plot_str, revisions=None): - datafile, vega_template_file = _parse_plot_str(plot_str) - data = _load_from_rev(repo, datafile, revisions) - vega_plot_json = Template(repo.plot_templates.templates_dir).fill( - vega_template_file, data, datafile - ) - return _prepare_div(vega_plot_json) +def _parse_template(path): + pass @locked -def plot(repo, dvc_template_file, revisions=None): - if not revisions: - revisions = [] +def plot(repo, datafile=None, template=None, revisions=None): + default_plot = False - is_html, plot_strings = _parse_plots(dvc_template_file) - m = { - plot_str: to_div(repo, plot_str, revisions) - for plot_str in plot_strings - } - - result = dvc_template_file.replace(".dvct", ".html") - if not is_html: - _save_plot_html( - [m[p] for p in plot_strings], result, + if template is None: + template_path = os.path.join( + repo.plot_templates.templates_dir, "default.dvct" ) + default_plot = True else: - raise NotImplementedError + template_path = _evaluate_templatepath(repo, template) + # TODO exception + assert template_path.endswith(".dvct") + + if revisions is None: + revisions = [] + # load datafiles from template + # TODO templatepath from templatefile + # datafiles = _parse_template(template_path) + data = _load_from_revisions(repo, datafile, revisions, default_plot) + result_path = Template.fill(template_path, data) logger.info( "Your can see your plot by opening {} in your " "browser!".format( format_link( - "file://{}".format(os.path.join(repo.root_dir, result)) + "file://{}".format(os.path.join(repo.root_dir, result_path)) ) ) ) - return result + return result_path + # replace DVC_PLOT_DATA in template w diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index f61a2aa8a3..9584c4179b 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,15 +1,18 @@ +import csv import json import logging +import shutil import pytest from bs4 import BeautifulSoup from funcy import first +from dvc.compat import fspath from dvc.exceptions import DvcException -def _run_with_metric(tmp_dir, metric, metric_filename, commit=None, tag=None): - tmp_dir.gen({metric_filename: json.dumps(metric)}) +def _run_with_metric(tmp_dir, metric_filename, commit=None, tag=None): + # tmp_dir.gen({metric_filename: json.dumps(metric)}) tmp_dir.dvc.run(metrics_no_cache=[metric_filename]) if hasattr(tmp_dir.dvc, "scm"): tmp_dir.dvc.scm.add([metric_filename, metric_filename + ".dvc"]) @@ -19,38 +22,102 @@ def _run_with_metric(tmp_dir, metric, metric_filename, commit=None, tag=None): tmp_dir.dvc.scm.tag(tag) -def test_plot_in_no_html(tmp_dir, scm, dvc): - metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - _run_with_metric(tmp_dir, metric, "metric.json", "first run") +def _write_csv(metric, filename): + with open(filename, "w") as csvobj: + if all([len(e) > 1 for e in metric]): + writer = csv.DictWriter( + csvobj, fieldnames=list(first(metric).keys()) + ) + writer.writeheader() + writer.writerows(metric) + else: + writer = csv.writer(csvobj) + for d in metric: + assert len(d) == 1 + writer.writerow(list(d.values())) - template_content = "" - (tmp_dir / "template.dvct").write_text(template_content) - result = dvc.plot("template.dvct") +def _write_json(tmp_dir, metric, filename): + tmp_dir.gen(filename, json.dumps(metric)) + +def test_plot_csv_one_column(tmp_dir, scm, dvc): + metric = [{"val": 2}, {"val": 3}] + _write_csv(metric, "metric.csv") + _run_with_metric(tmp_dir, metric_filename="metric.csv") + + result = dvc.plot("metric.csv") page_content = BeautifulSoup((tmp_dir / result).read_text()) - assert json.dumps( - { - "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": { - "values": [ - {"x": 1, "y": 2, "revision": "current workspace"}, - {"x": 2, "y": 3, "revision": "current workspace"}, - ] - }, - "mark": {"type": "line"}, - "encoding": { - "x": {"field": "x", "type": "quantitative"}, - "y": {"field": "y", "type": "quantitative"}, - "color": {"field": "revision", "type": "nominal"}, - }, - "title": "metric.json", - }, + vega_data = json.dumps( + [ + # TODO csv reads as strings, what to do with that? + {"y": "2", "x": 0, "revision": "current workspace"}, + {"y": "3", "x": 1, "revision": "current workspace"}, + ], indent=4, separators=(",", ": "), - ) in first(page_content.body.script.contents) + ) + assert vega_data in first(page_content.body.script.contents) + + +def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): + metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] + _write_csv(metric, "metric.csv") + _run_with_metric(tmp_dir, metric_filename="metric.csv") + + result = dvc.plot("metric.csv") + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + # header was skipped so index starts at 1 + {"y": "2", "x": 1, "revision": "current workspace"}, + {"y": "3", "x": 2, "revision": "current workspace"}, + ], + indent=4, + separators=(",", ": "), + ) + assert vega_data in first(page_content.body.script.contents) + + +def test_plot_json_single_val(tmp_dir, scm, dvc): + metric = [{"val": 2}, {"val": 3}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "first run") + + result = dvc.plot("metric.json") + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"y": 2, "x": 0, "revision": "current workspace"}, + {"y": 3, "x": 1, "revision": "current workspace"}, + ], + indent=4, + separators=(",", ": "), + ) + assert vega_data in first(page_content.body.script.contents) +def test_plot_json_multiple_val(tmp_dir, scm, dvc): + metric = [{"first_val": 100, "val": 2}, {"first_val": 100, "val": 3}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "first run") + + result = dvc.plot("metric.json") + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"y": 2, "x": 0, "revision": "current workspace"}, + {"y": 3, "x": 1, "revision": "current workspace"}, + ], + indent=4, + separators=(",", ": "), + ) + assert vega_data in first(page_content.body.script.contents) + + +@pytest.mark.skip def test_plot_confusion(tmp_dir, dvc): confusion_matrix = [ {"predicted": "B", "actual": "A"}, @@ -60,7 +127,7 @@ def test_plot_confusion(tmp_dir, dvc): template_content = "" (tmp_dir / "template.dvct").write_text(template_content) - result = dvc.plot("template.dvct") + result = dvc.plot_template("template.dvct") page_content = BeautifulSoup((tmp_dir / result).read_text()) assert json.dumps( @@ -104,42 +171,33 @@ def test_plot_confusion(tmp_dir, dvc): def test_plot_multiple_revisions(tmp_dir, scm, dvc): metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - _run_with_metric(tmp_dir, metric_1, "metric.json", "init", "v1") + _write_json(tmp_dir, metric_1, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "init", "v1") metric_2 = [{"x": 1, "y": 3}, {"x": 2, "y": 5}] - _run_with_metric(tmp_dir, metric_2, "metric.json", "second", "v2") + _write_json(tmp_dir, metric_2, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "second", "v2") metric_3 = [{"x": 1, "y": 5}, {"x": 2, "y": 6}] - _run_with_metric(tmp_dir, metric_3, "metric.json", "third") - - (tmp_dir / "template.dvct").write_text("") - dvc.plot("template.dvct", revisions=["HEAD", "v2", "v1"]) - - content = BeautifulSoup((tmp_dir / "template.html").read_text()) - assert json.dumps( - { - "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": { - "values": [ - {"x": 1, "y": 5, "revision": "HEAD"}, - {"x": 2, "y": 6, "revision": "HEAD"}, - {"x": 1, "y": 3, "revision": "v2"}, - {"x": 2, "y": 5, "revision": "v2"}, - {"x": 1, "y": 2, "revision": "v1"}, - {"x": 2, "y": 3, "revision": "v1"}, - ] - }, - "mark": {"type": "line"}, - "encoding": { - "x": {"field": "x", "type": "quantitative"}, - "y": {"field": "y", "type": "quantitative"}, - "color": {"field": "revision", "type": "nominal"}, - }, - "title": "metric.json", - }, + _write_json(tmp_dir, metric_3, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "third") + + dvc.plot("metric.json", revisions=["HEAD", "v2", "v1"]) + + content = BeautifulSoup((tmp_dir / "default.html").read_text()) + vega_data = json.dumps( + [ + {"y": 5, "x": 0, "revision": "HEAD"}, + {"y": 6, "x": 1, "revision": "HEAD"}, + {"y": 3, "x": 0, "revision": "v2"}, + {"y": 5, "x": 1, "revision": "v2"}, + {"y": 2, "x": 0, "revision": "v1"}, + {"y": 3, "x": 1, "revision": "v1"}, + ], indent=4, separators=(",", ": "), - ) in first(content.body.script.contents) + ) + assert vega_data in first(content.body.script.contents) def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): @@ -147,39 +205,27 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): scm.tag("v1") metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - _run_with_metric(tmp_dir, metric, "metric.json", "there is metric", "v2") - - (tmp_dir / "template.dvct").write_text("") + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "there is metric", "v2") caplog.clear() with caplog.at_level(logging.WARNING, "dvc"): - result = dvc.plot("template.dvct", revisions=["v1", "v2"]) + result = dvc.plot("metric.json", revisions=["v1", "v2"]) assert ( first(caplog.messages) == "File 'metric.json' was not found at: 'v1'. It will not be plotted." ) page_content = BeautifulSoup((tmp_dir / result).read_text()) - assert json.dumps( - { - "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": { - "values": [ - {"x": 1, "y": 2, "revision": "v2"}, - {"x": 2, "y": 3, "revision": "v2"}, - ] - }, - "mark": {"type": "line"}, - "encoding": { - "x": {"field": "x", "type": "quantitative"}, - "y": {"field": "y", "type": "quantitative"}, - "color": {"field": "revision", "type": "nominal"}, - }, - "title": "metric.json", - }, + vega_data = json.dumps( + [ + {"y": 2, "x": 0, "revision": "v2"}, + {"y": 3, "x": 1, "revision": "v2"}, + ], indent=4, separators=(",", ": "), - ) in first(page_content.body.script.contents) + ) + assert vega_data in first(page_content.body.script.contents) def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): @@ -193,11 +239,66 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): ) scm.tag("v2") - (tmp_dir / "template.dvct").write_text("") - # TODO create exception with pytest.raises(DvcException): - dvc.plot("template.dvct", revisions=["v2", "v1"]) + dvc.plot("metric.json", revisions=["v2", "v1"]) + + +def test_custom_template(tmp_dir, scm, dvc): + shutil.copy( + fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), + fspath(tmp_dir / "newtemplate.dvct"), + ) + + metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "init", "v1") + + result = dvc.plot("metric.json", "newtemplate.dvct") + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"a": 1, "b": 2, "revision": "current workspace"}, + {"a": 2, "b": 3, "revision": "current workspace"}, + ], + indent=4, + separators=(",", ": "), + ) + assert vega_data in first(page_content.body.script.contents) + + +def _replace(path, src, dst): + path.write_text(path.read_text().replace(src, dst)) + + +def test_custom_template_with_specified_data(tmp_dir, scm, dvc): + shutil.copy( + fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), + fspath(tmp_dir / "newtemplate.dvct"), + ) + _replace( + tmp_dir / "newtemplate.dvct", + "DVC_METRIC_DATA", + "DVC_METRIC_DATA::metric.json", + ) + + metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "init", "v1") + + result = dvc.plot(datafile=None, template="newtemplate.dvct") + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"a": 1, "b": 2, "revision": "current workspace"}, + {"a": 2, "b": 3, "revision": "current workspace"}, + ], + indent=4, + separators=(",", ": "), + ) + assert vega_data in first(page_content.body.script.contents) # TODO From 8cd3be9f9a8569d1f1ece86a20f43bc12f507ec1 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 8 Apr 2020 20:43:51 +0200 Subject: [PATCH 023/102] support for src file in dvct files --- dvc/plot.py | 72 ++++++++++++++++++++++++++++++----------- dvc/repo/plot.py | 20 ++++++------ tests/func/test_plot.py | 3 +- 3 files changed, 65 insertions(+), 30 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index 4768cd587e..c9397b3574 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -90,33 +90,67 @@ def load_template(self, path): raise DvcException("Not in repo nor in defaults") @staticmethod - def parse_data_placeholders(template_path): - regex = re.compile("") + def get_data_placeholders(template_path): + regex = re.compile('""') with open(template_path, "r") as fobj: template_content = fobj.read() - matches = regex.findall(template_content) - data_files = [ - m.replace("<", "") + return regex.findall(template_content) + + @staticmethod + def parse_data_placeholders(template_path): + data_files = { + Template.get_datafile(m) + for m in Template.get_data_placeholders(template_path) + } + return {df for df in data_files if df} + + @staticmethod + def get_datafile(placeholder_string): + return ( + placeholder_string.replace("<", "") + .replace('"', "") .replace(">", "") - .replace("DVC_METRIC_DATA::", "") - for m in matches - ] - return data_files + .replace("DVC_METRIC_DATA", "") + .replace("::", "") + ) @staticmethod - def fill(template_path, data): + def fill(template_path, data, priority_datafile=None): + result_path = os.path.basename(template_path).replace(".dvct", ".html") + with open(template_path, "r") as fobj: - template_str = fobj.read() - regex = re.compile('""') - matches = regex.findall(template_str) + result_content = fobj.read() - result_path = os.path.basename(template_path).replace(".dvct", ".html") - result_content = template_str.replace( - matches[0], - json.dumps( + template_placeholders = Template.get_data_placeholders(template_path) + if priority_datafile and len(template_placeholders) > 1: + raise DvcException("Dont know which datafile to ovveride") # Todo + + def dump(data): + return json.dumps( data, indent=Template.INDENT, separators=Template.SEPARATORS - ), - ) + ) + + for placeholder in Template.get_data_placeholders(template_path): + file = Template.get_datafile(placeholder) + if not file or priority_datafile: + to_dump = data[priority_datafile] + else: + to_dump = data[file] + result_content = result_content.replace( + placeholder, + json.dumps( + to_dump, + indent=Template.INDENT, + separators=Template.SEPARATORS, + ), + ) + + # result_content = template_str.replace( + # matches[0], + # json.dumps( + # data, indent=Template.INDENT, separators=Template.SEPARATORS + # ), + # ) with open(result_path, "w") as fobj: fobj.write(result_content) return result_path diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 82eea2917a..21ddf1b6b0 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -68,7 +68,6 @@ def _load_from_revision(repo, datafile, revision=None, default_plot=False): def _load_from_revisions(repo, datafile, revisions, default_plot=False): - # TODO those _load_from_revision calls data = [] if len(revisions) == 0: if repo.scm.is_dirty(): @@ -117,10 +116,6 @@ def _evaluate_templatepath(repo, template): raise DvcException("No template found") -def _parse_template(path): - pass - - @locked def plot(repo, datafile=None, template=None, revisions=None): default_plot = False @@ -138,11 +133,16 @@ def plot(repo, datafile=None, template=None, revisions=None): if revisions is None: revisions = [] - # load datafiles from template - # TODO templatepath from templatefile - # datafiles = _parse_template(template_path) - data = _load_from_revisions(repo, datafile, revisions, default_plot) - result_path = Template.fill(template_path, data) + template_datafiles = Template.parse_data_placeholders(template_path) + if datafile: + template_datafiles.add(datafile) + + data = { + datafile: _load_from_revisions(repo, datafile, revisions, default_plot) + for datafile in template_datafiles + } + + result_path = Template.fill(template_path, data, datafile) logger.info( "Your can see your plot by opening {} in your " "browser!".format( diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 9584c4179b..1ac3bd2fc2 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -302,7 +302,8 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): # TODO +# test for pure json template # def test_plot_in_html_file(tmp_dir): # def test_plot_override_data_file -# def test_plot_custom_template_file +# def test_plot_custom_template_file / test_ # def test_plot_multiple_plots From 8978a63e14588b0e6c155b3d2eef7719a36ce5bc Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 8 Apr 2020 20:58:54 +0200 Subject: [PATCH 024/102] minor fixes --- dvc/plot.py | 10 ++------ dvc/repo/plot.py | 5 ++-- tests/func/test_plot.py | 57 +++++++++++++++++------------------------ 3 files changed, 28 insertions(+), 44 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index c9397b3574..7325f79078 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -69,12 +69,6 @@ def dump(self): self.plot_templates_dir, self.TEMPLATE_NAME + self.EXTENTION ), ) - # json.dump( - # self.DEFAULT_CONTENT, - # fd, - # indent=self.INDENT, - # separators=self.SEPARATORS, - # ) def load_template(self, path): try: @@ -166,7 +160,7 @@ class DefaultLinearTemplate(Template): "encoding": { "x": {"field": "x", "type": "quantitative"}, "y": {"field": "y", "type": "quantitative"}, - "color": {"field": "revision", "type": "nominal"}, + "color": {"field": "rev", "type": "nominal"}, }, } @@ -185,7 +179,7 @@ class DefaultConfusionTemplate(Template): }, "y": {"field": "actual", "type": "nominal", "sort": "ascending"}, "color": {"aggregate": "count", "type": "quantitative"}, - "facet": {"field": "revision", "type": "nominal"}, + "facet": {"field": "rev", "type": "nominal"}, }, } diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 21ddf1b6b0..bc2744b921 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -49,7 +49,7 @@ def _load_from_tree(tree, datafile, default_plot=False): def _load_from_revision(repo, datafile, revision=None, default_plot=False): if revision is None: - revision = "current workspace" + revision = "current" tree = repo.tree else: tree = repo.scm.get_tree(revision) @@ -57,7 +57,7 @@ def _load_from_revision(repo, datafile, revision=None, default_plot=False): try: data = _load_from_tree(tree, datafile, default_plot) for d in data: - d["revision"] = revision + d["rev"] = revision except FileNotFoundError: logger.warning( "File '{}' was not found at: '{}'. It will not be " @@ -71,6 +71,7 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): data = [] if len(revisions) == 0: if repo.scm.is_dirty(): + logger.warning("Repo is dirty, extending with HEAD data") data.extend( _load_from_revision(repo, datafile, "HEAD", default_plot) ) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 1ac3bd2fc2..4a66c240e4 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -51,8 +51,8 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): vega_data = json.dumps( [ # TODO csv reads as strings, what to do with that? - {"y": "2", "x": 0, "revision": "current workspace"}, - {"y": "3", "x": 1, "revision": "current workspace"}, + {"y": "2", "x": 0, "rev": "current"}, + {"y": "3", "x": 1, "rev": "current"}, ], indent=4, separators=(",", ": "), @@ -70,8 +70,8 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): vega_data = json.dumps( [ # header was skipped so index starts at 1 - {"y": "2", "x": 1, "revision": "current workspace"}, - {"y": "3", "x": 2, "revision": "current workspace"}, + {"y": "2", "x": 1, "rev": "current"}, + {"y": "3", "x": 2, "rev": "current"}, ], indent=4, separators=(",", ": "), @@ -89,8 +89,8 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"y": 2, "x": 0, "revision": "current workspace"}, - {"y": 3, "x": 1, "revision": "current workspace"}, + {"y": 2, "x": 0, "rev": "current"}, + {"y": 3, "x": 1, "rev": "current"}, ], indent=4, separators=(",", ": "), @@ -108,8 +108,8 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"y": 2, "x": 0, "revision": "current workspace"}, - {"y": 3, "x": 1, "revision": "current workspace"}, + {"y": 2, "x": 0, "rev": "current"}, + {"y": 3, "x": 1, "rev": "current"}, ], indent=4, separators=(",", ": "), @@ -135,16 +135,8 @@ def test_plot_confusion(tmp_dir, dvc): "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": { "values": [ - { - "predicted": "B", - "actual": "A", - "revision": "current workspace", - }, - { - "predicted": "A", - "actual": "A", - "revision": "current workspace", - }, + {"predicted": "B", "actual": "A", "rev": "current"}, + {"predicted": "A", "actual": "A", "rev": "current"}, ] }, "mark": "rect", @@ -160,7 +152,7 @@ def test_plot_confusion(tmp_dir, dvc): "sort": "ascending", }, "color": {"aggregate": "count", "type": "quantitative"}, - "facet": {"field": "revision", "type": "nominal"}, + "facet": {"field": "rev", "type": "nominal"}, }, "title": "metric.json", }, @@ -169,7 +161,7 @@ def test_plot_confusion(tmp_dir, dvc): ) in first(page_content.body.script.contents) -def test_plot_multiple_revisions(tmp_dir, scm, dvc): +def test_plot_multiple_revs(tmp_dir, scm, dvc): metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] _write_json(tmp_dir, metric_1, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") @@ -187,12 +179,12 @@ def test_plot_multiple_revisions(tmp_dir, scm, dvc): content = BeautifulSoup((tmp_dir / "default.html").read_text()) vega_data = json.dumps( [ - {"y": 5, "x": 0, "revision": "HEAD"}, - {"y": 6, "x": 1, "revision": "HEAD"}, - {"y": 3, "x": 0, "revision": "v2"}, - {"y": 5, "x": 1, "revision": "v2"}, - {"y": 2, "x": 0, "revision": "v1"}, - {"y": 3, "x": 1, "revision": "v1"}, + {"y": 5, "x": 0, "rev": "HEAD"}, + {"y": 6, "x": 1, "rev": "HEAD"}, + {"y": 3, "x": 0, "rev": "v2"}, + {"y": 5, "x": 1, "rev": "v2"}, + {"y": 2, "x": 0, "rev": "v1"}, + {"y": 3, "x": 1, "rev": "v1"}, ], indent=4, separators=(",", ": "), @@ -218,10 +210,7 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( - [ - {"y": 2, "x": 0, "revision": "v2"}, - {"y": 3, "x": 1, "revision": "v2"}, - ], + [{"y": 2, "x": 0, "rev": "v2"}, {"y": 3, "x": 1, "rev": "v2"}], indent=4, separators=(",", ": "), ) @@ -259,8 +248,8 @@ def test_custom_template(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"a": 1, "b": 2, "revision": "current workspace"}, - {"a": 2, "b": 3, "revision": "current workspace"}, + {"a": 1, "b": 2, "rev": "current"}, + {"a": 2, "b": 3, "rev": "current"}, ], indent=4, separators=(",", ": "), @@ -292,8 +281,8 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"a": 1, "b": 2, "revision": "current workspace"}, - {"a": 2, "b": 3, "revision": "current workspace"}, + {"a": 1, "b": 2, "rev": "current"}, + {"a": 2, "b": 3, "rev": "current"}, ], indent=4, separators=(",", ": "), From 0a3df45ec9850cf3f92c40757f7f783f2651ad46 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 9 Apr 2020 13:05:45 +0200 Subject: [PATCH 025/102] plot: support json templates --- dvc/plot.py | 55 +++++++++----- dvc/repo/plot.py | 100 ++++++++++++------------- tests/func/test_plot.py | 158 ++++++++++++++++++++++++---------------- 3 files changed, 179 insertions(+), 134 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index 7325f79078..c7c5f00a8b 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -4,6 +4,7 @@ import random import re import string +from json import JSONDecodeError from funcy import cached_property @@ -46,6 +47,18 @@ def _prepare_div(vega_dict): ) +def _embed(content): + if "" in content: + return content + + try: + vega_dict = json.loads(content) + except JSONDecodeError: + # TODO + raise DvcException("Not html, nor json") + return PAGE_HTML.format(divs=_prepare_div([vega_dict])) + + class Template: INDENT = 4 SEPARATORS = (",", ": ") @@ -85,7 +98,7 @@ def load_template(self, path): @staticmethod def get_data_placeholders(template_path): - regex = re.compile('""') + regex = re.compile('""]*>"') with open(template_path, "r") as fobj: template_content = fobj.read() return regex.findall(template_content) @@ -102,10 +115,10 @@ def parse_data_placeholders(template_path): def get_datafile(placeholder_string): return ( placeholder_string.replace("<", "") - .replace('"', "") .replace(">", "") + .replace('"', "") .replace("DVC_METRIC_DATA", "") - .replace("::", "") + .replace(",", "") ) @staticmethod @@ -115,15 +128,6 @@ def fill(template_path, data, priority_datafile=None): with open(template_path, "r") as fobj: result_content = fobj.read() - template_placeholders = Template.get_data_placeholders(template_path) - if priority_datafile and len(template_placeholders) > 1: - raise DvcException("Dont know which datafile to ovveride") # Todo - - def dump(data): - return json.dumps( - data, indent=Template.INDENT, separators=Template.SEPARATORS - ) - for placeholder in Template.get_data_placeholders(template_path): file = Template.get_datafile(placeholder) if not file or priority_datafile: @@ -139,12 +143,8 @@ def dump(data): ), ) - # result_content = template_str.replace( - # matches[0], - # json.dumps( - # data, indent=Template.INDENT, separators=Template.SEPARATORS - # ), - # ) + result_content = _embed(result_content) + with open(result_path, "w") as fobj: fobj.write(result_content) return result_path @@ -185,6 +185,7 @@ class DefaultConfusionTemplate(Template): class PlotTemplates: + # TODO os.path? check whether it should not be repo.tree TEMPLATES_DIR = "plot" TEMPLATES = [DefaultLinearTemplate, DefaultConfusionTemplate] @@ -192,6 +193,24 @@ class PlotTemplates: def templates_dir(self): return os.path.join(self.dvc_dir, self.TEMPLATES_DIR) + @cached_property + def default_template(self): + return os.path.join(self.templates_dir, "default.dvct") + + def get_template(self, path): + t_path = os.path.join(self.templates_dir, path) + if os.path.exists(t_path): + return t_path + else: + regex = re.compile(t_path + ".*") + for root, d, fs in os.walk(self.templates_dir): + for f in fs: + path = os.path.join(root, f) + if regex.findall(path): + return path + + raise DvcException("Template not found") + def __init__(self, dvc_dir): self.dvc_dir = dvc_dir diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index bc2744b921..5e2e7230cc 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -2,14 +2,12 @@ import json import logging import os -import re from funcy import first from dvc.exceptions import DvcException from dvc.plot import Template from dvc.repo import locked -from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -18,32 +16,42 @@ def _all_dict_of_length_one(data): return all([isinstance(e, dict) and len(e) == 1 for e in data]) +# TODO test parsing def _load_from_tree(tree, datafile, default_plot=False): if datafile.endswith(".json"): - with tree.open(datafile, "r") as fobj: - data = json.load(fobj) - assert isinstance(data, list) + data = _parse_json(datafile, default_plot, tree) - if default_plot: - assert all(len(e) >= 1 for e in data) - last_key = list(first(data).keys())[-1] - data = [{"y": d[last_key], "x": i} for i, d in enumerate(data)] elif datafile.endswith(".csv"): - with tree.open(datafile, "r") as fobj: - if default_plot: - data = [] - for index, row in enumerate(csv.reader(fobj)): - assert len(row) >= 1 - if index == 0 and len(row) > 1: - # skip header - continue - data.append({"y": row[-1], "x": index}) - else: - data = [ - row - for row in (csv.DictReader(fobj, skipinitialspace=True)) - ] + data = _parse_csv(datafile, default_plot, tree) + + return data + + +def _parse_csv(datafile, default_plot, tree): + with tree.open(datafile, "r") as fobj: + if default_plot: + data = [] + for index, row in enumerate(csv.reader(fobj)): + assert len(row) >= 1 + if index == 0 and len(row) > 1: + # skip header + continue + data.append({"y": row[-1], "x": index}) + else: + data = [ + row for row in (csv.DictReader(fobj, skipinitialspace=True)) + ] + return data + +def _parse_json(datafile, default_plot, tree): + with tree.open(datafile, "r") as fobj: + data = json.load(fobj) + assert isinstance(data, list) + if default_plot: + assert all(len(e) >= 1 for e in data) + last_key = list(first(data).keys())[-1] + data = [{"y": d[last_key], "x": i} for i, d in enumerate(data)] return data @@ -68,8 +76,10 @@ def _load_from_revision(repo, datafile, revision=None, default_plot=False): def _load_from_revisions(repo, datafile, revisions, default_plot=False): + # TODO test data = [] if len(revisions) == 0: + # TODO implement status for file if repo.scm.is_dirty(): logger.warning("Repo is dirty, extending with HEAD data") data.extend( @@ -99,44 +109,38 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): def _evaluate_templatepath(repo, template): + # TODO test if os.path.exists(template): return template else: - # TODO - logger.debug("Template '{}' not found, checking in plot dir.") - plots_dir_path = os.path.join( - repo.plot_templates.templates_dir, template - ) - if os.path.exists(plots_dir_path): - return plots_dir_path - else: - regex = re.compile(template + ".*") - for t in os.listdir(repo.plot_templates.templates_dir): - if regex.match(t): - return os.path.join(repo.plot_templates.templates_dir, t) - raise DvcException("No template found") + return repo.plot_templates.get_template(template) @locked def plot(repo, datafile=None, template=None, revisions=None): - default_plot = False - if template is None: - template_path = os.path.join( - repo.plot_templates.templates_dir, "default.dvct" - ) - default_plot = True + template_path = repo.plot_templates.default_template else: template_path = _evaluate_templatepath(repo, template) # TODO exception assert template_path.endswith(".dvct") + default_plot = ( + True + if template_path == repo.plot_templates.default_template + else False + ) + if revisions is None: revisions = [] template_datafiles = Template.parse_data_placeholders(template_path) + if datafile: - template_datafiles.add(datafile) + if len(template_datafiles) > 1: + # TODO + raise DvcException("Don't know which datafile to replace") + template_datafiles = {datafile} data = { datafile: _load_from_revisions(repo, datafile, revisions, default_plot) @@ -144,13 +148,5 @@ def plot(repo, datafile=None, template=None, revisions=None): } result_path = Template.fill(template_path, data, datafile) - logger.info( - "Your can see your plot by opening {} in your " - "browser!".format( - format_link( - "file://{}".format(os.path.join(repo.root_dir, result_path)) - ) - ) - ) + logger.info("file://{}".format(os.path.join(repo.root_dir, result_path))) return result_path - # replace DVC_PLOT_DATA in template w diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 4a66c240e4..5fb287e15a 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -9,6 +9,11 @@ from dvc.compat import fspath from dvc.exceptions import DvcException +from dvc.plot import DefaultLinearTemplate + + +def _remove_whitespace(value): + return value.replace(" ", "").replace("\n", "") def _run_with_metric(tmp_dir, metric_filename, commit=None, tag=None): @@ -54,10 +59,10 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): {"y": "2", "x": 0, "rev": "current"}, {"y": "3", "x": 1, "rev": "current"}, ], - indent=4, - separators=(",", ": "), ) - assert vega_data in first(page_content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): @@ -73,10 +78,10 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): {"y": "2", "x": 1, "rev": "current"}, {"y": "3", "x": 2, "rev": "current"}, ], - indent=4, - separators=(",", ": "), ) - assert vega_data in first(page_content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) def test_plot_json_single_val(tmp_dir, scm, dvc): @@ -92,10 +97,10 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): {"y": 2, "x": 0, "rev": "current"}, {"y": 3, "x": 1, "rev": "current"}, ], - indent=4, - separators=(",", ": "), ) - assert vega_data in first(page_content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) def test_plot_json_multiple_val(tmp_dir, scm, dvc): @@ -111,54 +116,32 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): {"y": 2, "x": 0, "rev": "current"}, {"y": 3, "x": 1, "rev": "current"}, ], - indent=4, - separators=(",", ": "), ) - assert vega_data in first(page_content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) -@pytest.mark.skip def test_plot_confusion(tmp_dir, dvc): confusion_matrix = [ {"predicted": "B", "actual": "A"}, {"predicted": "A", "actual": "A"}, ] - _run_with_metric(tmp_dir, confusion_matrix, "metric.json", "first run") - template_content = "" - (tmp_dir / "template.dvct").write_text(template_content) + _write_json(tmp_dir, confusion_matrix, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot_template("template.dvct") + result = dvc.plot(datafile="metric.json", template="cf") page_content = BeautifulSoup((tmp_dir / result).read_text()) - assert json.dumps( - { - "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": { - "values": [ - {"predicted": "B", "actual": "A", "rev": "current"}, - {"predicted": "A", "actual": "A", "rev": "current"}, - ] - }, - "mark": "rect", - "encoding": { - "x": { - "field": "predicted", - "type": "nominal", - "sort": "ascending", - }, - "y": { - "field": "actual", - "type": "nominal", - "sort": "ascending", - }, - "color": {"aggregate": "count", "type": "quantitative"}, - "facet": {"field": "rev", "type": "nominal"}, - }, - "title": "metric.json", - }, - indent=4, - separators=(",", ": "), - ) in first(page_content.body.script.contents) + vega_data = json.dumps( + [ + {"predicted": "B", "actual": "A", "rev": "current"}, + {"predicted": "A", "actual": "A", "rev": "current"}, + ], + ) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) def test_plot_multiple_revs(tmp_dir, scm, dvc): @@ -176,7 +159,7 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): dvc.plot("metric.json", revisions=["HEAD", "v2", "v1"]) - content = BeautifulSoup((tmp_dir / "default.html").read_text()) + page_content = BeautifulSoup((tmp_dir / "default.html").read_text()) vega_data = json.dumps( [ {"y": 5, "x": 0, "rev": "HEAD"}, @@ -189,7 +172,9 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): indent=4, separators=(",", ": "), ) - assert vega_data in first(content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): @@ -211,10 +196,10 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [{"y": 2, "x": 0, "rev": "v2"}, {"y": 3, "x": 1, "rev": "v2"}], - indent=4, - separators=(",", ": "), ) - assert vega_data in first(page_content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): @@ -251,10 +236,10 @@ def test_custom_template(tmp_dir, scm, dvc): {"a": 1, "b": 2, "rev": "current"}, {"a": 2, "b": 3, "rev": "current"}, ], - indent=4, - separators=(",", ": "), ) - assert vega_data in first(page_content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) def _replace(path, src, dst): @@ -269,7 +254,7 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): _replace( tmp_dir / "newtemplate.dvct", "DVC_METRIC_DATA", - "DVC_METRIC_DATA::metric.json", + "DVC_METRIC_DATA,metric.json", ) metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] @@ -284,15 +269,60 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): {"a": 1, "b": 2, "rev": "current"}, {"a": 2, "b": 3, "rev": "current"}, ], - indent=4, - separators=(",", ": "), ) - assert vega_data in first(page_content.body.script.contents) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) -# TODO -# test for pure json template -# def test_plot_in_html_file(tmp_dir): -# def test_plot_override_data_file -# def test_plot_custom_template_file / test_ -# def test_plot_multiple_plots +def test_plot_override_specified_data_source(tmp_dir, scm, dvc): + shutil.copy( + fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), + fspath(tmp_dir / "newtemplate.dvct"), + ) + _replace( + tmp_dir / "newtemplate.dvct", + "DVC_METRIC_DATA", + "DVC_METRIC_DATA,metric.json", + ) + + metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] + _write_json(tmp_dir, metric, "metric2.json") + _run_with_metric(tmp_dir, "metric2.json", "init", "v1") + + result = dvc.plot(datafile="metric2.json", template="newtemplate.dvct") + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"a": 1, "b": 2, "rev": "current"}, + {"a": 2, "b": 3, "rev": "current"}, + ], + ) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) + + +def test_should_embed_vega_json_template(tmp_dir, scm, dvc): + template = DefaultLinearTemplate.DEFAULT_CONTENT + template["data"] = {"values": ""} + + (tmp_dir / "template.dvct").write_text(json.dumps(template)) + + metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "init", "v1") + + result = dvc.plot("metric.json", "template.dvct") + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"x": 1, "y": 2, "rev": "current"}, + {"x": 2, "y": 3, "rev": "current"}, + ], + ) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) From c9209520fb32fff7801f0e1763c2caadaae5df0c Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 9 Apr 2020 13:21:27 +0200 Subject: [PATCH 026/102] plot: rename confusion template --- dvc/plot.py | 2 +- tests/func/test_plot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index c7c5f00a8b..fedf8fca28 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -166,7 +166,7 @@ class DefaultLinearTemplate(Template): class DefaultConfusionTemplate(Template): - TEMPLATE_NAME = "cf" + TEMPLATE_NAME = "confusion_matrix" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": {"values": Template.METRIC_DATA_STRING}, diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 5fb287e15a..85a252d801 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -130,7 +130,7 @@ def test_plot_confusion(tmp_dir, dvc): _write_json(tmp_dir, confusion_matrix, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot(datafile="metric.json", template="cf") + result = dvc.plot(datafile="metric.json", template="confusion") page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( From b78f43cdf28a64829a437c94bfbabca3490a2716 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 9 Apr 2020 14:42:16 +0200 Subject: [PATCH 027/102] plot: polish command behaviour --- dvc/command/plot.py | 4 ++-- dvc/repo/plot.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 09aa4cf88e..413b29c701 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -96,8 +96,8 @@ def add_parser(subparsers, parent_parser): ) plot_diff_parser.add_argument( "revisions", - nargs="+", - default=[], + nargs="*", + default=None, help=("Git revisions to plot from"), ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 5e2e7230cc..eacfb19cc0 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -81,7 +81,6 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): if len(revisions) == 0: # TODO implement status for file if repo.scm.is_dirty(): - logger.warning("Repo is dirty, extending with HEAD data") data.extend( _load_from_revision(repo, datafile, "HEAD", default_plot) ) From 4a675d3e573c9658a7551ea25b73ad39905d51cd Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 09:59:26 +0200 Subject: [PATCH 028/102] fix test for json --- dvc/plot.py | 15 --------------- tests/func/test_plot.py | 17 +++++++---------- 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index fedf8fca28..1dd918acd4 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -4,7 +4,6 @@ import random import re import string -from json import JSONDecodeError from funcy import cached_property @@ -47,18 +46,6 @@ def _prepare_div(vega_dict): ) -def _embed(content): - if "" in content: - return content - - try: - vega_dict = json.loads(content) - except JSONDecodeError: - # TODO - raise DvcException("Not html, nor json") - return PAGE_HTML.format(divs=_prepare_div([vega_dict])) - - class Template: INDENT = 4 SEPARATORS = (",", ": ") @@ -143,8 +130,6 @@ def fill(template_path, data, priority_datafile=None): ), ) - result_content = _embed(result_content) - with open(result_path, "w") as fobj: fobj.write(result_content) return result_path diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 85a252d801..7804aa64f1 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -316,13 +316,10 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): result = dvc.plot("metric.json", "template.dvct") - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"x": 1, "y": 2, "rev": "current"}, - {"x": 2, "y": 3, "rev": "current"}, - ], - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + result_content = json.loads((tmp_dir / result).read_text()) + vega_data = [ + {"x": 1, "y": 2, "rev": "current"}, + {"x": 2, "y": 3, "rev": "current"}, + ] + + assert vega_data == result_content["data"]["values"] From ec81363970750669a6a9326bbf454ebca2c18e0f Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 11:01:45 +0200 Subject: [PATCH 029/102] plot: test command --- dvc/command/plot.py | 23 +++++++++++-- dvc/plot.py | 6 ++-- dvc/repo/plot.py | 7 ++-- tests/func/test_plot.py | 4 +-- tests/unit/command/test_plot.py | 58 +++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 11 deletions(-) create mode 100644 tests/unit/command/test_plot.py diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 413b29c701..ee296d1933 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -11,11 +11,15 @@ class CmdPlotShow(CmdBase): def run(self): try: - # TODO overriding datafile functionality - self.repo.plot(self.args.datafile, self.args.template) + self.repo.plot( + datafile=self.args.datafile, + template=self.args.template, + file=self.args.file, + ) except DvcException: logger.exception("failed to plot metrics") + return 1 return 0 @@ -26,6 +30,7 @@ def run(self): self.args.datafile, self.args.template, revisions=self.args.revisions, + file=self.args.file, ) except DvcException: @@ -63,7 +68,14 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_show_parser.add_argument( - "--template", nargs="?", default=None, help="dvct file to visualize." + "-f", "--file", help="Specify name of the " "file it generates." + ) + plot_show_parser.add_argument( + "-t", + "--template", + nargs="?", + default=None, + help="dvct file to " "visualize.", ) plot_show_parser.add_argument( "datafile", @@ -83,12 +95,17 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_diff_parser.add_argument( + "-f", "--file", help="Specify name of the " "file it generates." + ) + plot_diff_parser.add_argument( + "-t", "--template", nargs="?", default=None, help=("dvct template file to " "process."), ) plot_diff_parser.add_argument( + "-d", "--datafile", nargs="?", default=None, diff --git a/dvc/plot.py b/dvc/plot.py index 1dd918acd4..988fa94215 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -109,8 +109,9 @@ def get_datafile(placeholder_string): ) @staticmethod - def fill(template_path, data, priority_datafile=None): - result_path = os.path.basename(template_path).replace(".dvct", ".html") + def fill(template_path, data, priority_datafile=None, result_path=None): + if not result_path: + result_path = os.path.basename(template_path) + ".html" with open(template_path, "r") as fobj: result_content = fobj.read() @@ -170,7 +171,6 @@ class DefaultConfusionTemplate(Template): class PlotTemplates: - # TODO os.path? check whether it should not be repo.tree TEMPLATES_DIR = "plot" TEMPLATES = [DefaultLinearTemplate, DefaultConfusionTemplate] diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index eacfb19cc0..434444f7d0 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -108,7 +108,6 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): def _evaluate_templatepath(repo, template): - # TODO test if os.path.exists(template): return template else: @@ -116,7 +115,7 @@ def _evaluate_templatepath(repo, template): @locked -def plot(repo, datafile=None, template=None, revisions=None): +def plot(repo, datafile=None, template=None, revisions=None, file=None): if template is None: template_path = repo.plot_templates.default_template else: @@ -146,6 +145,8 @@ def plot(repo, datafile=None, template=None, revisions=None): for datafile in template_datafiles } - result_path = Template.fill(template_path, data, datafile) + result_path = Template.fill( + template_path, data, datafile, result_path=file + ) logger.info("file://{}".format(os.path.join(repo.root_dir, result_path))) return result_path diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 7804aa64f1..deaab42531 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -157,9 +157,9 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): _write_json(tmp_dir, metric_3, "metric.json") _run_with_metric(tmp_dir, "metric.json", "third") - dvc.plot("metric.json", revisions=["HEAD", "v2", "v1"]) + result = dvc.plot("metric.json", revisions=["HEAD", "v2", "v1"]) - page_content = BeautifulSoup((tmp_dir / "default.html").read_text()) + page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ {"y": 5, "x": 0, "rev": "HEAD"}, diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py new file mode 100644 index 0000000000..1765b91edd --- /dev/null +++ b/tests/unit/command/test_plot.py @@ -0,0 +1,58 @@ +from dvc.cli import parse_args +from dvc.command.plot import CmdPlotShow, CmdPlotDiff + + +def test_metrics_diff(dvc, mocker): + cli_args = parse_args( + [ + "plot", + "diff", + "-f", + "result.extension", + "-t", + "template", + "-d" "datafile", + "HEAD", + "tag1", + "tag2", + ] + ) + assert cli_args.func == CmdPlotDiff + + cmd = cli_args.func(cli_args) + + m = mocker.patch.object(cmd.repo, "plot", autospec=True) + + assert cmd.run() == 0 + + m.assert_called_once_with( + datafile="datafile", + template="template", + file="result.extension", + revisions=["HEAD", "tag1", "tag2"], + ) + + +def test_metrics_show(dvc, mocker): + cli_args = parse_args( + [ + "plot", + "show", + "-f", + "result.extension", + "-t", + "template", + "datafile", + ] + ) + assert cli_args.func == CmdPlotShow + + cmd = cli_args.func(cli_args) + + m = mocker.patch.object(cmd.repo, "plot", autospec=True) + + assert cmd.run() == 0 + + m.assert_called_once_with( + datafile="datafile", template="template", file="result.extension", + ) From 944f1d2e3fe1e46c8d59c41fd32da73faaa2dcbb Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 11:24:19 +0200 Subject: [PATCH 030/102] some minor fixes for tests --- dvc/plot.py | 5 +++- dvc/repo/plot.py | 3 +++ tests/func/test_plot.py | 52 +++++++++++++++++++++++++++++++---------- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index 988fa94215..0178db8c32 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -42,7 +42,9 @@ def _prepare_div(vega_dict): id = "".join(random.sample(string.ascii_lowercase, 8)) return DIV_HTML.format( id=str(id), - vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + vega_json=json.dumps( + vega_dict, indent=4, separators=(",", ": "), sort_keys=True + ), ) @@ -128,6 +130,7 @@ def fill(template_path, data, priority_datafile=None, result_path=None): to_dump, indent=Template.INDENT, separators=Template.SEPARATORS, + sort_keys=True, ), ) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 434444f7d0..115faf9969 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -24,6 +24,9 @@ def _load_from_tree(tree, datafile, default_plot=False): elif datafile.endswith(".csv"): data = _parse_csv(datafile, default_plot, tree) + else: + raise DvcException("Could not parse") + return data diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index deaab42531..8462c135de 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -2,6 +2,7 @@ import json import logging import shutil +from collections import OrderedDict import pytest from bs4 import BeautifulSoup @@ -59,6 +60,7 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): {"y": "2", "x": 0, "rev": "current"}, {"y": "3", "x": 1, "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -66,7 +68,10 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] + metric = [ + OrderedDict([("first_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("val", 3)]), + ] _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") @@ -78,6 +83,7 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): {"y": "2", "x": 1, "rev": "current"}, {"y": "3", "x": 2, "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -97,14 +103,22 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): {"y": 2, "x": 0, "rev": "current"}, {"y": 3, "x": 1, "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) ) +@pytest.mark.skip( + reason="Consider whether we want to support this case, " + "problems with dict keys order for python 3.5" +) def test_plot_json_multiple_val(tmp_dir, scm, dvc): - metric = [{"first_val": 100, "val": 2}, {"first_val": 100, "val": 3}] + metric = [ + OrderedDict([("first_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("val", 3)]), + ] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") @@ -116,6 +130,7 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): {"y": 2, "x": 0, "rev": "current"}, {"y": 3, "x": 1, "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -138,6 +153,7 @@ def test_plot_confusion(tmp_dir, dvc): {"predicted": "B", "actual": "A", "rev": "current"}, {"predicted": "A", "actual": "A", "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -145,6 +161,10 @@ def test_plot_confusion(tmp_dir, dvc): def test_plot_multiple_revs(tmp_dir, scm, dvc): + shutil.copy( + fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), "template.dvct" + ) + metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] _write_json(tmp_dir, metric_1, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") @@ -157,20 +177,24 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): _write_json(tmp_dir, metric_3, "metric.json") _run_with_metric(tmp_dir, "metric.json", "third") - result = dvc.plot("metric.json", revisions=["HEAD", "v2", "v1"]) + result = dvc.plot( + "metric.json", + template="template.dvct", + revisions=["HEAD", "v2", "v1"], + file="result.html", + ) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"y": 5, "x": 0, "rev": "HEAD"}, - {"y": 6, "x": 1, "rev": "HEAD"}, - {"y": 3, "x": 0, "rev": "v2"}, - {"y": 5, "x": 1, "rev": "v2"}, - {"y": 2, "x": 0, "rev": "v1"}, - {"y": 3, "x": 1, "rev": "v1"}, + {"y": 5, "x": 1, "rev": "HEAD"}, + {"y": 6, "x": 2, "rev": "HEAD"}, + {"y": 3, "x": 1, "rev": "v2"}, + {"y": 5, "x": 2, "rev": "v2"}, + {"y": 2, "x": 1, "rev": "v1"}, + {"y": 3, "x": 2, "rev": "v1"}, ], - indent=4, - separators=(",", ": "), + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -181,7 +205,7 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): tmp_dir.scm_gen("some_file", "content", commit="there is no metric") scm.tag("v1") - metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + metric = [{"y": 2}, {"y": 3}] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "there is metric", "v2") @@ -196,6 +220,7 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [{"y": 2, "x": 0, "rev": "v2"}, {"y": 3, "x": 1, "rev": "v2"}], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -236,6 +261,7 @@ def test_custom_template(tmp_dir, scm, dvc): {"a": 1, "b": 2, "rev": "current"}, {"a": 2, "b": 3, "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -269,6 +295,7 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): {"a": 1, "b": 2, "rev": "current"}, {"a": 2, "b": 3, "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) @@ -298,6 +325,7 @@ def test_plot_override_specified_data_source(tmp_dir, scm, dvc): {"a": 1, "b": 2, "rev": "current"}, {"a": 2, "b": 3, "rev": "current"}, ], + sort_keys=True, ) assert _remove_whitespace(vega_data) in _remove_whitespace( first(page_content.body.script.contents) From f7ff4c21dee60e7380a2d14ddf9049d7619c5685 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 12:06:51 +0200 Subject: [PATCH 031/102] plot: unit test loading --- dvc/repo/plot.py | 15 +++++++++++---- tests/func/test_plot.py | 1 - 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 115faf9969..b6ba5e8e6e 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -79,27 +79,34 @@ def _load_from_revision(repo, datafile, revision=None, default_plot=False): def _load_from_revisions(repo, datafile, revisions, default_plot=False): - # TODO test data = [] if len(revisions) == 0: # TODO implement status for file if repo.scm.is_dirty(): data.extend( - _load_from_revision(repo, datafile, "HEAD", default_plot) + _load_from_revision( + repo, datafile, "HEAD", default_plot=default_plot + ) ) data.extend( _load_from_revision(repo, datafile, default_plot=default_plot) ) elif len(revisions) == 1: data.extend( - _load_from_revision(repo, datafile, revisions[0], default_plot) + _load_from_revision( + repo, datafile, revisions[0], default_plot=default_plot + ) ) data.extend( _load_from_revision(repo, datafile, default_plot=default_plot) ) else: for rev in revisions: - data.extend(_load_from_revision(repo, datafile, rev, default_plot)) + data.extend( + _load_from_revision( + repo, datafile, rev, default_plot=default_plot + ) + ) if not data: raise DvcException( diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 8462c135de..cf94bd34b4 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -56,7 +56,6 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - # TODO csv reads as strings, what to do with that? {"y": "2", "x": 0, "rev": "current"}, {"y": "3", "x": 1, "rev": "current"}, ], From 4dd72ae3607a9bc1812520cbecc6a6f2f88ce12a Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 12:08:25 +0200 Subject: [PATCH 032/102] plot: unit test loading --- tests/unit/repo/test_plot.py | 64 ++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 tests/unit/repo/test_plot.py diff --git a/tests/unit/repo/test_plot.py b/tests/unit/repo/test_plot.py new file mode 100644 index 0000000000..9d518c5609 --- /dev/null +++ b/tests/unit/repo/test_plot.py @@ -0,0 +1,64 @@ +import pytest +from mock import MagicMock, call + +from dvc.exceptions import DvcException +from dvc.repo.plot import _load_from_revisions + + +def test_load_no_revisions_clean(mocker): + m = mocker.patch("dvc.repo.plot._load_from_revision") + repo = MagicMock() + repo.scm.is_dirty.return_value = False + + with pytest.raises(DvcException): + _load_from_revisions(repo, "datafile", [], False) + + assert m.call_count == 1 + assert m.call_args_list[0] == call(repo, "datafile", default_plot=False) + + +def test_load_no_revisions_dirty(mocker): + m = mocker.patch("dvc.repo.plot._load_from_revision") + repo = MagicMock() + repo.scm.is_dirty.return_value = True + + with pytest.raises(DvcException): + _load_from_revisions(repo, "datafile", [], False) + + assert m.call_count == 2 + assert m.call_args_list[0] == call( + repo, "datafile", "HEAD", default_plot=False + ) + assert m.call_args_list[1] == call(repo, "datafile", default_plot=False) + + +def test_load_one(mocker): + m = mocker.patch("dvc.repo.plot._load_from_revision") + repo = MagicMock() + repo.scm.is_dirty.return_value = True + + with pytest.raises(DvcException): + _load_from_revisions(repo, "datafile", ["tag1"], False) + + assert m.call_count == 2 + assert m.call_args_list[0] == call( + repo, "datafile", "tag1", default_plot=False + ) + assert m.call_args_list[1] == call(repo, "datafile", default_plot=False) + + +def test_load_more(mocker): + m = mocker.patch("dvc.repo.plot._load_from_revision") + repo = MagicMock() + repo.scm.is_dirty.return_value = True + + with pytest.raises(DvcException): + _load_from_revisions(repo, "datafile", ["tag1", "tag2"], False) + + assert m.call_count == 2 + assert m.call_args_list[0] == call( + repo, "datafile", "tag1", default_plot=False + ) + assert m.call_args_list[1] == call( + repo, "datafile", "tag2", default_plot=False + ) From df35c4912efae6e801d42985bd368bac3d81c77c Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 12:24:37 +0200 Subject: [PATCH 033/102] plot: handle TODOS --- dvc/repo/__init__.py | 1 - dvc/repo/plot.py | 32 +++++++++++++++++++++----------- tests/func/test_plot.py | 5 ++--- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index ecb6211966..0165c794ff 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -114,7 +114,6 @@ def __init__(self, root_dir=None): self._ignore() - # TODO is it necessary? self.plot_templates = PlotTemplates(self.dvc_dir) @property diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index b6ba5e8e6e..3781123424 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -12,11 +12,29 @@ logger = logging.getLogger(__name__) +class NoMetricsInHistoryError(DvcException): + def __init__(self, path, revisions): + super().__init__( + "Could not find '{}' on any of the revisions: " + "'{}'".format(path, ", ".join(revisions)) + ) + + +class TooManyDataSourcesError(DvcException): + def __init__(self, datafile, template_datafiles): + super().__init__( + "Unable to reason which of possible data sources: '{}' " + "should be replaced with '{}'".format( + ", ".join(template_datafiles), datafile + ) + ) + + def _all_dict_of_length_one(data): return all([isinstance(e, dict) and len(e) == 1 for e in data]) -# TODO test parsing +# TODO try to use parsing from metric def _load_from_tree(tree, datafile, default_plot=False): if datafile.endswith(".json"): data = _parse_json(datafile, default_plot, tree) @@ -81,7 +99,6 @@ def _load_from_revision(repo, datafile, revision=None, default_plot=False): def _load_from_revisions(repo, datafile, revisions, default_plot=False): data = [] if len(revisions) == 0: - # TODO implement status for file if repo.scm.is_dirty(): data.extend( _load_from_revision( @@ -109,11 +126,7 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): ) if not data: - raise DvcException( - "Target metric: '{}' could not be found at any of '{}'".format( - datafile, ", ".join(revisions) - ) - ) + raise NoMetricsInHistoryError(datafile, revisions) return data @@ -130,8 +143,6 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): template_path = repo.plot_templates.default_template else: template_path = _evaluate_templatepath(repo, template) - # TODO exception - assert template_path.endswith(".dvct") default_plot = ( True @@ -146,8 +157,7 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): if datafile: if len(template_datafiles) > 1: - # TODO - raise DvcException("Don't know which datafile to replace") + raise TooManyDataSourcesError(datafile, template_datafiles) template_datafiles = {datafile} data = { diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index cf94bd34b4..6fbc210fa8 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -9,8 +9,8 @@ from funcy import first from dvc.compat import fspath -from dvc.exceptions import DvcException from dvc.plot import DefaultLinearTemplate +from dvc.repo.plot import NoMetricsInHistoryError def _remove_whitespace(value): @@ -237,8 +237,7 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): ) scm.tag("v2") - # TODO create exception - with pytest.raises(DvcException): + with pytest.raises(NoMetricsInHistoryError): dvc.plot("metric.json", revisions=["v2", "v1"]) From 7e3503f7507ac8a95d5ce8f1704a5d382dd5ba13 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 12:31:04 +0200 Subject: [PATCH 034/102] cleanup --- tests/func/test_plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 6fbc210fa8..d7396bf6fb 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -18,7 +18,6 @@ def _remove_whitespace(value): def _run_with_metric(tmp_dir, metric_filename, commit=None, tag=None): - # tmp_dir.gen({metric_filename: json.dumps(metric)}) tmp_dir.dvc.run(metrics_no_cache=[metric_filename]) if hasattr(tmp_dir.dvc, "scm"): tmp_dir.dvc.scm.add([metric_filename, metric_filename + ".dvc"]) From 469bf780935b45c619037ce50a7dc125d1a0bf54 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 13:58:40 +0200 Subject: [PATCH 035/102] use mocker --- tests/unit/repo/test_plot.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/tests/unit/repo/test_plot.py b/tests/unit/repo/test_plot.py index 9d518c5609..3e5bb395a1 100644 --- a/tests/unit/repo/test_plot.py +++ b/tests/unit/repo/test_plot.py @@ -1,5 +1,4 @@ import pytest -from mock import MagicMock, call from dvc.exceptions import DvcException from dvc.repo.plot import _load_from_revisions @@ -7,58 +6,64 @@ def test_load_no_revisions_clean(mocker): m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = MagicMock() + repo = mocker.MagicMock() repo.scm.is_dirty.return_value = False with pytest.raises(DvcException): _load_from_revisions(repo, "datafile", [], False) assert m.call_count == 1 - assert m.call_args_list[0] == call(repo, "datafile", default_plot=False) + assert m.call_args_list[0] == mocker.call( + repo, "datafile", default_plot=False + ) def test_load_no_revisions_dirty(mocker): m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = MagicMock() + repo = mocker.MagicMock() repo.scm.is_dirty.return_value = True with pytest.raises(DvcException): _load_from_revisions(repo, "datafile", [], False) assert m.call_count == 2 - assert m.call_args_list[0] == call( + assert m.call_args_list[0] == mocker.call( repo, "datafile", "HEAD", default_plot=False ) - assert m.call_args_list[1] == call(repo, "datafile", default_plot=False) + assert m.call_args_list[1] == mocker.call( + repo, "datafile", default_plot=False + ) def test_load_one(mocker): m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = MagicMock() + repo = mocker.MagicMock() repo.scm.is_dirty.return_value = True with pytest.raises(DvcException): _load_from_revisions(repo, "datafile", ["tag1"], False) assert m.call_count == 2 - assert m.call_args_list[0] == call( + assert m.call_args_list[0] == mocker.call( repo, "datafile", "tag1", default_plot=False ) - assert m.call_args_list[1] == call(repo, "datafile", default_plot=False) + assert m.call_args_list[1] == mocker.call( + repo, "datafile", default_plot=False + ) def test_load_more(mocker): m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = MagicMock() + repo = mocker.MagicMock() repo.scm.is_dirty.return_value = True with pytest.raises(DvcException): _load_from_revisions(repo, "datafile", ["tag1", "tag2"], False) assert m.call_count == 2 - assert m.call_args_list[0] == call( + assert m.call_args_list[0] == mocker.call( repo, "datafile", "tag1", default_plot=False ) - assert m.call_args_list[1] == call( + assert m.call_args_list[1] == mocker.call( repo, "datafile", "tag2", default_plot=False ) From ee3879cd3cd63de6d9963d46785f39d6c2bb2ed6 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 14:16:51 +0200 Subject: [PATCH 036/102] plot: support tsv --- dvc/repo/plot.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 3781123424..14f6aa9f4d 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -41,18 +41,21 @@ def _load_from_tree(tree, datafile, default_plot=False): elif datafile.endswith(".csv"): data = _parse_csv(datafile, default_plot, tree) - + elif datafile.endswith(".tsv"): + data = _parse_csv(datafile, default_plot, tree, "\t") else: - raise DvcException("Could not parse") + raise DvcException( + "Could not deduct file type from file: '{}'".format(datafile) + ) return data -def _parse_csv(datafile, default_plot, tree): +def _parse_csv(datafile, default_plot, tree, delimiter=","): with tree.open(datafile, "r") as fobj: if default_plot: data = [] - for index, row in enumerate(csv.reader(fobj)): + for index, row in enumerate(csv.reader(fobj, delimiter=delimiter)): assert len(row) >= 1 if index == 0 and len(row) > 1: # skip header @@ -60,7 +63,12 @@ def _parse_csv(datafile, default_plot, tree): data.append({"y": row[-1], "x": index}) else: data = [ - row for row in (csv.DictReader(fobj, skipinitialspace=True)) + row + for row in ( + csv.DictReader( + fobj, skipinitialspace=True, delimiter=delimiter + ) + ) ] return data From 0317b31e7850233776bf2dcc068b92124bf87d33 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 14:39:55 +0200 Subject: [PATCH 037/102] plot: command refactoring --- dvc/command/plot.py | 30 +++++++++++++++--------------- dvc/repo/plot.py | 1 - 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index ee296d1933..68efc6ceb5 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -41,7 +41,10 @@ def run(self): def add_parser(subparsers, parent_parser): - PLOT_HELP = "For visualisation" + PLOT_HELP = ( + "For visualisation of metrics stored in structured files (" + "json, csv, tsv)." + ) plot_parser = subparsers.add_parser( "plot", @@ -67,21 +70,18 @@ def add_parser(subparsers, parent_parser): help=SHOW_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) - plot_show_parser.add_argument( - "-f", "--file", help="Specify name of the " "file it generates." - ) plot_show_parser.add_argument( "-t", "--template", nargs="?", default=None, - help="dvct file to " "visualize.", + help="File to be injected with data.", ) plot_show_parser.add_argument( - "datafile", - nargs="?", - default=None, - help="Vega template file " "used to visualize " "data from datafile", + "datafile", nargs="?", default=None, help="Data to be visualized." + ) + plot_show_parser.add_argument( + "-f", "--file", help="Specify name of the file it generates." ) plot_show_parser.set_defaults(func=CmdPlotShow) @@ -94,27 +94,27 @@ def add_parser(subparsers, parent_parser): help=PLOT_DIFF_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) - plot_diff_parser.add_argument( - "-f", "--file", help="Specify name of the " "file it generates." - ) plot_diff_parser.add_argument( "-t", "--template", nargs="?", default=None, - help=("dvct template file to " "process."), + help=("File to be injected wit data."), ) plot_diff_parser.add_argument( "-d", "--datafile", nargs="?", default=None, - help="Vega template file " "used to visualize " "data from datafile", + help="Data to be visualized.", + ) + plot_diff_parser.add_argument( + "-f", "--file", help="Specify name of the file it generates." ) plot_diff_parser.add_argument( "revisions", nargs="*", default=None, - help=("Git revisions to plot from"), + help="Git revisions to plot from", ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 14f6aa9f4d..50ac7cca2e 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -34,7 +34,6 @@ def _all_dict_of_length_one(data): return all([isinstance(e, dict) and len(e) == 1 for e in data]) -# TODO try to use parsing from metric def _load_from_tree(tree, datafile, default_plot=False): if datafile.endswith(".json"): data = _parse_json(datafile, default_plot, tree) From 117c2fc5d76a283337a0977876a800c5cccb4abc Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 15:37:45 +0200 Subject: [PATCH 038/102] plot: fix windows issues with tests --- tests/func/test_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index d7396bf6fb..dd2fa49b39 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -28,7 +28,7 @@ def _run_with_metric(tmp_dir, metric_filename, commit=None, tag=None): def _write_csv(metric, filename): - with open(filename, "w") as csvobj: + with open(filename, "w", newline="") as csvobj: if all([len(e) > 1 for e in metric]): writer = csv.DictWriter( csvobj, fieldnames=list(first(metric).keys()) From b3b3a3a18208628d394ce9a645e9b9c14b26a193 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 15:50:02 +0200 Subject: [PATCH 039/102] plot: test: some more windows fixes --- dvc/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/plot.py b/dvc/plot.py index 0178db8c32..97fde1b3fb 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -190,7 +190,7 @@ def get_template(self, path): if os.path.exists(t_path): return t_path else: - regex = re.compile(t_path + ".*") + regex = re.compile(re.escape(t_path) + ".*") for root, d, fs in os.walk(self.templates_dir): for f in fs: path = os.path.join(root, f) From d53a187118367963ceeb4955ae8adde81deda95e Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 16:12:38 +0200 Subject: [PATCH 040/102] plot: _load_from_revisions complexity fix --- dvc/repo/plot.py | 29 ++++++++--------------------- tests/unit/repo/test_plot.py | 8 ++++---- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 50ac7cca2e..0395231c67 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -105,32 +105,19 @@ def _load_from_revision(repo, datafile, revision=None, default_plot=False): def _load_from_revisions(repo, datafile, revisions, default_plot=False): data = [] - if len(revisions) == 0: - if repo.scm.is_dirty(): - data.extend( - _load_from_revision( - repo, datafile, "HEAD", default_plot=default_plot - ) - ) + + if len(revisions) <= 1: data.extend( _load_from_revision(repo, datafile, default_plot=default_plot) ) - elif len(revisions) == 1: - data.extend( - _load_from_revision( - repo, datafile, revisions[0], default_plot=default_plot - ) - ) + + if len(revisions) == 0 and repo.scm.is_dirty(): + revisions.append("HEAD") + + for rev in revisions: data.extend( - _load_from_revision(repo, datafile, default_plot=default_plot) + _load_from_revision(repo, datafile, rev, default_plot=default_plot) ) - else: - for rev in revisions: - data.extend( - _load_from_revision( - repo, datafile, rev, default_plot=default_plot - ) - ) if not data: raise NoMetricsInHistoryError(datafile, revisions) diff --git a/tests/unit/repo/test_plot.py b/tests/unit/repo/test_plot.py index 3e5bb395a1..bd31fd1684 100644 --- a/tests/unit/repo/test_plot.py +++ b/tests/unit/repo/test_plot.py @@ -27,10 +27,10 @@ def test_load_no_revisions_dirty(mocker): _load_from_revisions(repo, "datafile", [], False) assert m.call_count == 2 - assert m.call_args_list[0] == mocker.call( + assert m.call_args_list[1] == mocker.call( repo, "datafile", "HEAD", default_plot=False ) - assert m.call_args_list[1] == mocker.call( + assert m.call_args_list[0] == mocker.call( repo, "datafile", default_plot=False ) @@ -45,10 +45,10 @@ def test_load_one(mocker): assert m.call_count == 2 assert m.call_args_list[0] == mocker.call( - repo, "datafile", "tag1", default_plot=False + repo, "datafile", default_plot=False ) assert m.call_args_list[1] == mocker.call( - repo, "datafile", default_plot=False + repo, "datafile", "tag1", default_plot=False ) From 47ee620ddfab14544eb0b0a216ee1e241dbf27cc Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 16:18:29 +0200 Subject: [PATCH 041/102] plot: reduce complexity --- dvc/repo/plot.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 0395231c67..276c3a033d 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -133,6 +133,7 @@ def _evaluate_templatepath(repo, template): @locked def plot(repo, datafile=None, template=None, revisions=None, file=None): + if template is None: template_path = repo.plot_templates.default_template else: @@ -147,13 +148,7 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): if revisions is None: revisions = [] - template_datafiles = Template.parse_data_placeholders(template_path) - - if datafile: - if len(template_datafiles) > 1: - raise TooManyDataSourcesError(datafile, template_datafiles) - template_datafiles = {datafile} - + template_datafiles = _parse_template(template_path, datafile) data = { datafile: _load_from_revisions(repo, datafile, revisions, default_plot) for datafile in template_datafiles @@ -164,3 +159,12 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): ) logger.info("file://{}".format(os.path.join(repo.root_dir, result_path))) return result_path + + +def _parse_template(template_path, datafile): + template_datafiles = Template.parse_data_placeholders(template_path) + if datafile: + if len(template_datafiles) > 1: + raise TooManyDataSourcesError(datafile, template_datafiles) + template_datafiles = {datafile} + return template_datafiles From 66c70910982c6a2e235186958def883f36ff6ffd Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 16:25:36 +0200 Subject: [PATCH 042/102] plot: complexity reduction --- dvc/plot.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index 97fde1b3fb..a8bad00463 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -115,9 +115,17 @@ def fill(template_path, data, priority_datafile=None, result_path=None): if not result_path: result_path = os.path.basename(template_path) + ".html" + result_content = Template._fill(template_path, data, priority_datafile) + + with open(result_path, "w") as fobj: + fobj.write(result_content) + + return result_path + + @staticmethod + def _fill(template_path, data, priority_datafile): with open(template_path, "r") as fobj: result_content = fobj.read() - for placeholder in Template.get_data_placeholders(template_path): file = Template.get_datafile(placeholder) if not file or priority_datafile: @@ -133,10 +141,7 @@ def fill(template_path, data, priority_datafile=None, result_path=None): sort_keys=True, ), ) - - with open(result_path, "w") as fobj: - fobj.write(result_content) - return result_path + return result_content class DefaultLinearTemplate(Template): From 4d1c20f094a31335da96722e914756bd97601d7c Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 16:31:42 +0200 Subject: [PATCH 043/102] plot: deepsource suggestions --- dvc/plot.py | 18 +++++++++--------- dvc/repo/plot.py | 9 ++------- tests/unit/command/test_plot.py | 3 ++- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index a8bad00463..b71820f641 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -39,9 +39,9 @@ def _save_plot_html(divs, path): def _prepare_div(vega_dict): - id = "".join(random.sample(string.ascii_lowercase, 8)) + div_id = "".join(random.sample(string.ascii_lowercase, 8)) return DIV_HTML.format( - id=str(id), + id=str(div_id), vega_json=json.dumps( vega_dict, indent=4, separators=(",", ": "), sort_keys=True ), @@ -194,13 +194,13 @@ def get_template(self, path): t_path = os.path.join(self.templates_dir, path) if os.path.exists(t_path): return t_path - else: - regex = re.compile(re.escape(t_path) + ".*") - for root, d, fs in os.walk(self.templates_dir): - for f in fs: - path = os.path.join(root, f) - if regex.findall(path): - return path + + regex = re.compile(re.escape(t_path) + ".*") + for root, d, fs in os.walk(self.templates_dir): + for f in fs: + path = os.path.join(root, f) + if regex.findall(path): + return path raise DvcException("Template not found") diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 276c3a033d..d67f3e00c4 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -127,8 +127,7 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): def _evaluate_templatepath(repo, template): if os.path.exists(template): return template - else: - return repo.plot_templates.get_template(template) + return repo.plot_templates.get_template(template) @locked @@ -139,11 +138,7 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): else: template_path = _evaluate_templatepath(repo, template) - default_plot = ( - True - if template_path == repo.plot_templates.default_template - else False - ) + default_plot = template_path == repo.plot_templates.default_template if revisions is None: revisions = [] diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 1765b91edd..3303bbee76 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -11,7 +11,8 @@ def test_metrics_diff(dvc, mocker): "result.extension", "-t", "template", - "-d" "datafile", + "-d", + "datafile", "HEAD", "tag1", "tag2", From 22041d3fd8297157f8160c64921db57e430c9140 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 16:34:18 +0200 Subject: [PATCH 044/102] plot: move template path evaluation --- dvc/repo/plot.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index d67f3e00c4..e9e05432bd 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -124,7 +124,10 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): return data -def _evaluate_templatepath(repo, template): +def _evaluate_templatepath(repo, template=None): + if not template: + return repo.plot_templates.default_template + if os.path.exists(template): return template return repo.plot_templates.get_template(template) @@ -133,10 +136,7 @@ def _evaluate_templatepath(repo, template): @locked def plot(repo, datafile=None, template=None, revisions=None, file=None): - if template is None: - template_path = repo.plot_templates.default_template - else: - template_path = _evaluate_templatepath(repo, template) + template_path = _evaluate_templatepath(repo, template) default_plot = template_path == repo.plot_templates.default_template From f9709c6cead5fb90291496d0d8f813ffd2bad17b Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 16:38:04 +0200 Subject: [PATCH 045/102] fixup --- dvc/repo/plot.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index e9e05432bd..32279067c6 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -135,14 +135,13 @@ def _evaluate_templatepath(repo, template=None): @locked def plot(repo, datafile=None, template=None, revisions=None, file=None): + if revisions is None: + revisions = [] template_path = _evaluate_templatepath(repo, template) default_plot = template_path == repo.plot_templates.default_template - if revisions is None: - revisions = [] - template_datafiles = _parse_template(template_path, datafile) data = { datafile: _load_from_revisions(repo, datafile, revisions, default_plot) From 02aee4ec2f41e995137a371ea6dd61201ceb0657 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 10 Apr 2020 16:42:57 +0200 Subject: [PATCH 046/102] fixup --- dvc/repo/plot.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 32279067c6..5f9b745b25 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -30,10 +30,6 @@ def __init__(self, datafile, template_datafiles): ) -def _all_dict_of_length_one(data): - return all([isinstance(e, dict) and len(e) == 1 for e in data]) - - def _load_from_tree(tree, datafile, default_plot=False): if datafile.endswith(".json"): data = _parse_json(datafile, default_plot, tree) From 119935f44358fc9bd7fc414a13c6581b0090f473 Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 13 Apr 2020 10:39:15 +0200 Subject: [PATCH 047/102] exception on no datafile and no template --- dvc/repo/plot.py | 10 ++++++++++ tests/func/test_plot.py | 7 ++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 5f9b745b25..ecc7a376a1 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -30,6 +30,13 @@ def __init__(self, datafile, template_datafiles): ) +class NoDataNorTemplateProvided(DvcException): + def __init__(self): + super().__init__( + "Cannot plot if datafile or template is not provided." + ) + + def _load_from_tree(tree, datafile, default_plot=False): if datafile.endswith(".json"): data = _parse_json(datafile, default_plot, tree) @@ -134,6 +141,9 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): if revisions is None: revisions = [] + if not datafile and not template: + raise NoDataNorTemplateProvided() + template_path = _evaluate_templatepath(repo, template) default_plot = template_path == repo.plot_templates.default_template diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index dd2fa49b39..d5dfee57ac 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -10,7 +10,7 @@ from dvc.compat import fspath from dvc.plot import DefaultLinearTemplate -from dvc.repo.plot import NoMetricsInHistoryError +from dvc.repo.plot import NoMetricsInHistoryError, NoDataNorTemplateProvided def _remove_whitespace(value): @@ -348,3 +348,8 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): ] assert vega_data == result_content["data"]["values"] + + +def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): + with pytest.raises(NoDataNorTemplateProvided): + dvc.plot() From 9c3548116bf06be1da88e28f70ef4d3ac9f9177c Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 13 Apr 2020 11:47:58 +0200 Subject: [PATCH 048/102] json metric load with OrderedDict --- dvc/command/plot.py | 2 +- dvc/repo/plot.py | 3 ++- tests/func/test_plot.py | 10 +++------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 68efc6ceb5..e3a39be17a 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -99,7 +99,7 @@ def add_parser(subparsers, parent_parser): "--template", nargs="?", default=None, - help=("File to be injected wit data."), + help=("File to be injected with data."), ) plot_diff_parser.add_argument( "-d", diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index ecc7a376a1..ad5640c295 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -2,6 +2,7 @@ import json import logging import os +from collections import OrderedDict from funcy import first @@ -77,7 +78,7 @@ def _parse_csv(datafile, default_plot, tree, delimiter=","): def _parse_json(datafile, default_plot, tree): with tree.open(datafile, "r") as fobj: - data = json.load(fobj) + data = json.load(fobj, object_pairs_hook=OrderedDict) assert isinstance(data, list) if default_plot: assert all(len(e) >= 1 for e in data) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index d5dfee57ac..635d694687 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -43,7 +43,7 @@ def _write_csv(metric, filename): def _write_json(tmp_dir, metric, filename): - tmp_dir.gen(filename, json.dumps(metric)) + tmp_dir.gen(filename, json.dumps(metric, sort_keys=True)) def test_plot_csv_one_column(tmp_dir, scm, dvc): @@ -108,14 +108,10 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): ) -@pytest.mark.skip( - reason="Consider whether we want to support this case, " - "problems with dict keys order for python 3.5" -) def test_plot_json_multiple_val(tmp_dir, scm, dvc): metric = [ - OrderedDict([("first_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("val", 3)]), + {"first_val": 100, "val": 2}, + {"first_val": 200, "val": 3}, ] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") From 7d351f8c1ccadf5594fbdcb82909f747e7100fc1 Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 13 Apr 2020 14:11:02 +0200 Subject: [PATCH 049/102] plot: improve handling non-existing files on revisions --- dvc/repo/plot.py | 54 +++++++++++++++++++++++------------- tests/func/test_plot.py | 26 ++++++++++------- tests/unit/repo/test_plot.py | 14 +++++----- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index ad5640c295..1d9b37f77e 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -class NoMetricsInHistoryError(DvcException): +class NoMetricInHistoryError(DvcException): def __init__(self, path, revisions): super().__init__( "Could not find '{}' on any of the revisions: " @@ -21,6 +21,15 @@ def __init__(self, path, revisions): ) +class NoMetricOnRevisionError(DvcException): + def __init__(self, path, revision): + self.path = path + self.revision = revision + super().__init__( + "Could not find '{}' on revision: " "'{}'".format(path, revision) + ) + + class TooManyDataSourcesError(DvcException): def __init__(self, datafile, template_datafiles): super().__init__( @@ -38,6 +47,9 @@ def __init__(self): ) +WORKSPACE_REVISION_NAME = "current" + + def _load_from_tree(tree, datafile, default_plot=False): if datafile.endswith(".json"): data = _parse_json(datafile, default_plot, tree) @@ -87,9 +99,8 @@ def _parse_json(datafile, default_plot, tree): return data -def _load_from_revision(repo, datafile, revision=None, default_plot=False): - if revision is None: - revision = "current" +def _load_from_revision(repo, datafile, revision, default_plot=False): + if revision is WORKSPACE_REVISION_NAME: tree = repo.tree else: tree = repo.scm.get_tree(revision) @@ -99,32 +110,37 @@ def _load_from_revision(repo, datafile, revision=None, default_plot=False): for d in data: d["rev"] = revision except FileNotFoundError: - logger.warning( - "File '{}' was not found at: '{}'. It will not be " - "plotted.".format(datafile, revision) - ) - data = [] + raise NoMetricOnRevisionError(datafile, revision) return data def _load_from_revisions(repo, datafile, revisions, default_plot=False): data = [] + exceptions = [] if len(revisions) <= 1: - data.extend( - _load_from_revision(repo, datafile, default_plot=default_plot) - ) - if len(revisions) == 0 and repo.scm.is_dirty(): revisions.append("HEAD") + revisions.append(WORKSPACE_REVISION_NAME) for rev in revisions: - data.extend( - _load_from_revision(repo, datafile, rev, default_plot=default_plot) - ) - - if not data: - raise NoMetricsInHistoryError(datafile, revisions) + try: + data.extend( + _load_from_revision( + repo, datafile, rev, default_plot=default_plot + ) + ) + except NoMetricOnRevisionError as e: + exceptions.append(e) + + if not data and exceptions: + raise NoMetricInHistoryError(datafile, revisions) + elif exceptions: + for e in exceptions: + logger.warning( + "File '{}' was not found at: '{}'. It will not be " + "plotted.".format(e.path, e.revision) + ) return data diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 635d694687..52e65293f8 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -10,7 +10,7 @@ from dvc.compat import fspath from dvc.plot import DefaultLinearTemplate -from dvc.repo.plot import NoMetricsInHistoryError, NoDataNorTemplateProvided +from dvc.repo.plot import NoMetricInHistoryError, NoDataNorTemplateProvided def _remove_whitespace(value): @@ -221,19 +221,25 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): ) -def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc): +def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): tmp_dir.scm_gen("some_file", "content", commit="there is no metric") scm.tag("v1") - tmp_dir.scm_gen( - "some_other_file", - "other content", - commit="there is no metric as well", - ) - scm.tag("v2") + tmp_dir.gen("some_file", "make repo dirty") + + caplog.clear() + with pytest.raises(NoMetricInHistoryError) as error, caplog.at_level( + logging.WARNING, "dvc" + ): + dvc.plot("metric.json", revisions=["v1"]) - with pytest.raises(NoMetricsInHistoryError): - dvc.plot("metric.json", revisions=["v2", "v1"]) + # do not warn if none found + assert len(caplog.messages) == 0 + + assert ( + "Could not find 'metric.json' on any of the revisions: 'v1, current'" + == str(error.value) + ) def test_custom_template(tmp_dir, scm, dvc): diff --git a/tests/unit/repo/test_plot.py b/tests/unit/repo/test_plot.py index bd31fd1684..a9117cd0e9 100644 --- a/tests/unit/repo/test_plot.py +++ b/tests/unit/repo/test_plot.py @@ -1,7 +1,7 @@ import pytest from dvc.exceptions import DvcException -from dvc.repo.plot import _load_from_revisions +from dvc.repo.plot import _load_from_revisions, WORKSPACE_REVISION_NAME def test_load_no_revisions_clean(mocker): @@ -14,7 +14,7 @@ def test_load_no_revisions_clean(mocker): assert m.call_count == 1 assert m.call_args_list[0] == mocker.call( - repo, "datafile", default_plot=False + repo, "datafile", WORKSPACE_REVISION_NAME, default_plot=False ) @@ -27,11 +27,11 @@ def test_load_no_revisions_dirty(mocker): _load_from_revisions(repo, "datafile", [], False) assert m.call_count == 2 - assert m.call_args_list[1] == mocker.call( + assert m.call_args_list[0] == mocker.call( repo, "datafile", "HEAD", default_plot=False ) - assert m.call_args_list[0] == mocker.call( - repo, "datafile", default_plot=False + assert m.call_args_list[1] == mocker.call( + repo, "datafile", WORKSPACE_REVISION_NAME, default_plot=False ) @@ -45,10 +45,10 @@ def test_load_one(mocker): assert m.call_count == 2 assert m.call_args_list[0] == mocker.call( - repo, "datafile", default_plot=False + repo, "datafile", "tag1", default_plot=False ) assert m.call_args_list[1] == mocker.call( - repo, "datafile", "tag1", default_plot=False + repo, "datafile", WORKSPACE_REVISION_NAME, default_plot=False ) From 972b61f3a57f6878016045dfa8b8922e078287bc Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 13 Apr 2020 14:24:21 +0200 Subject: [PATCH 050/102] plot: improve handling non-existing files on revisions --- tests/unit/repo/test_plot.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/tests/unit/repo/test_plot.py b/tests/unit/repo/test_plot.py index a9117cd0e9..2c64412a87 100644 --- a/tests/unit/repo/test_plot.py +++ b/tests/unit/repo/test_plot.py @@ -1,6 +1,3 @@ -import pytest - -from dvc.exceptions import DvcException from dvc.repo.plot import _load_from_revisions, WORKSPACE_REVISION_NAME @@ -9,8 +6,7 @@ def test_load_no_revisions_clean(mocker): repo = mocker.MagicMock() repo.scm.is_dirty.return_value = False - with pytest.raises(DvcException): - _load_from_revisions(repo, "datafile", [], False) + _load_from_revisions(repo, "datafile", [], False) assert m.call_count == 1 assert m.call_args_list[0] == mocker.call( @@ -23,8 +19,7 @@ def test_load_no_revisions_dirty(mocker): repo = mocker.MagicMock() repo.scm.is_dirty.return_value = True - with pytest.raises(DvcException): - _load_from_revisions(repo, "datafile", [], False) + _load_from_revisions(repo, "datafile", [], False) assert m.call_count == 2 assert m.call_args_list[0] == mocker.call( @@ -40,8 +35,7 @@ def test_load_one(mocker): repo = mocker.MagicMock() repo.scm.is_dirty.return_value = True - with pytest.raises(DvcException): - _load_from_revisions(repo, "datafile", ["tag1"], False) + _load_from_revisions(repo, "datafile", ["tag1"], False) assert m.call_count == 2 assert m.call_args_list[0] == mocker.call( @@ -57,8 +51,7 @@ def test_load_more(mocker): repo = mocker.MagicMock() repo.scm.is_dirty.return_value = True - with pytest.raises(DvcException): - _load_from_revisions(repo, "datafile", ["tag1", "tag2"], False) + _load_from_revisions(repo, "datafile", ["tag1", "tag2"], False) assert m.call_count == 2 assert m.call_args_list[0] == mocker.call( From 766bf57891929882d24f07e469b1c285f8614a6f Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 13 Apr 2020 14:40:54 +0200 Subject: [PATCH 051/102] change default plot path --- dvc/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/plot.py b/dvc/plot.py index b71820f641..e630456021 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -113,7 +113,7 @@ def get_datafile(placeholder_string): @staticmethod def fill(template_path, data, priority_datafile=None, result_path=None): if not result_path: - result_path = os.path.basename(template_path) + ".html" + result_path = os.path.join(os.getcwd(), "plot.html") result_content = Template._fill(template_path, data, priority_datafile) From 29dfeaf8a0427d1dfe83d1e0206d1d13fca6293d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 14 Apr 2020 15:31:53 +0200 Subject: [PATCH 052/102] some exceptions and fixes --- dvc/command/plot.py | 4 +- dvc/plot.py | 43 +++++++++++++------ dvc/repo/plot.py | 15 ++++--- tests/func/test_plot.py | 95 +++++++++++++++++++++++++---------------- 4 files changed, 102 insertions(+), 55 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index e3a39be17a..5b577cf552 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -18,7 +18,7 @@ def run(self): ) except DvcException: - logger.exception("failed to plot metrics") + logger.exception("plot show:") return 1 return 0 @@ -34,7 +34,7 @@ def run(self): ) except DvcException: - logger.exception("failed to plot metrics diff") + logger.exception("plot diff:") return 1 return 0 diff --git a/dvc/plot.py b/dvc/plot.py index e630456021..f98cbf4ba7 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -13,6 +13,17 @@ logger = logging.getLogger(__name__) + +class TemplateNotFound(DvcException): + def __init__(self, path): + super().__init__("Template: '{}' not found.".format(path)) + + +class NoDataForTemplateError(DvcException): + def __init__(self, template_path): + super().__init__("No data provided for '{}'.".format(template_path)) + + PAGE_HTML = """ dvc plot @@ -51,7 +62,7 @@ def _prepare_div(vega_dict): class Template: INDENT = 4 SEPARATORS = (",", ": ") - EXTENTION = ".dvct" + EXTENTION = ".vt" METRIC_DATA_STRING = "" def __init__(self, templates_dir): @@ -113,7 +124,9 @@ def get_datafile(placeholder_string): @staticmethod def fill(template_path, data, priority_datafile=None, result_path=None): if not result_path: - result_path = os.path.join(os.getcwd(), "plot.html") + filename = os.path.basename(template_path) + filename.replace(Template.EXTENTION, ".html") + result_path = os.path.join(os.getcwd(), filename) result_content = Template._fill(template_path, data, priority_datafile) @@ -126,16 +139,22 @@ def fill(template_path, data, priority_datafile=None, result_path=None): def _fill(template_path, data, priority_datafile): with open(template_path, "r") as fobj: result_content = fobj.read() + for placeholder in Template.get_data_placeholders(template_path): file = Template.get_datafile(placeholder) + if not file or priority_datafile: - to_dump = data[priority_datafile] + key = priority_datafile else: - to_dump = data[file] + key = file + + if not key: + raise NoDataForTemplateError(template_path) + result_content = result_content.replace( placeholder, json.dumps( - to_dump, + data[key], indent=Template.INDENT, separators=Template.SEPARATORS, sort_keys=True, @@ -188,7 +207,7 @@ def templates_dir(self): @cached_property def default_template(self): - return os.path.join(self.templates_dir, "default.dvct") + return os.path.join(self.templates_dir, "default.vt") def get_template(self, path): t_path = os.path.join(self.templates_dir, path) @@ -196,13 +215,13 @@ def get_template(self, path): return t_path regex = re.compile(re.escape(t_path) + ".*") - for root, d, fs in os.walk(self.templates_dir): - for f in fs: - path = os.path.join(root, f) - if regex.findall(path): - return path + for root, _, files in os.walk(self.templates_dir): + for file in files: + full_file = os.path.join(root, file) + if regex.findall(full_file): + return full_file - raise DvcException("Template not found") + raise TemplateNotFound(path) def __init__(self, dvc_dir): self.dvc_dir = dvc_dir diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 1d9b37f77e..b7c9fe4672 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -42,12 +42,19 @@ def __init__(self, datafile, template_datafiles): class NoDataNorTemplateProvided(DvcException): def __init__(self): + super().__init__("Datafile or template is not specified.") + + +# TODO support yaml +class PlotMetricTypeError(DvcException): + def __init__(self, path): super().__init__( - "Cannot plot if datafile or template is not provided." + "'{}' - file type error\n" + "Only json, yaml, csv and tsv types are supported.".format(path) ) -WORKSPACE_REVISION_NAME = "current" +WORKSPACE_REVISION_NAME = "workspace" def _load_from_tree(tree, datafile, default_plot=False): @@ -59,9 +66,7 @@ def _load_from_tree(tree, datafile, default_plot=False): elif datafile.endswith(".tsv"): data = _parse_csv(datafile, default_plot, tree, "\t") else: - raise DvcException( - "Could not deduct file type from file: '{}'".format(datafile) - ) + raise PlotMetricTypeError(datafile) return data diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 52e65293f8..468dc1c149 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -9,8 +9,16 @@ from funcy import first from dvc.compat import fspath -from dvc.plot import DefaultLinearTemplate -from dvc.repo.plot import NoMetricInHistoryError, NoDataNorTemplateProvided +from dvc.plot import ( + DefaultLinearTemplate, + TemplateNotFound, + NoDataForTemplateError, +) +from dvc.repo.plot import ( + NoMetricInHistoryError, + NoDataNorTemplateProvided, + PlotMetricTypeError, +) def _remove_whitespace(value): @@ -55,8 +63,8 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"y": "2", "x": 0, "rev": "current"}, - {"y": "3", "x": 1, "rev": "current"}, + {"y": "2", "x": 0, "rev": "workspace"}, + {"y": "3", "x": 1, "rev": "workspace"}, ], sort_keys=True, ) @@ -78,8 +86,8 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): vega_data = json.dumps( [ # header was skipped so index starts at 1 - {"y": "2", "x": 1, "rev": "current"}, - {"y": "3", "x": 2, "rev": "current"}, + {"y": "2", "x": 1, "rev": "workspace"}, + {"y": "3", "x": 2, "rev": "workspace"}, ], sort_keys=True, ) @@ -98,8 +106,8 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"y": 2, "x": 0, "rev": "current"}, - {"y": 3, "x": 1, "rev": "current"}, + {"y": 2, "x": 0, "rev": "workspace"}, + {"y": 3, "x": 1, "rev": "workspace"}, ], sort_keys=True, ) @@ -121,8 +129,8 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"y": 2, "x": 0, "rev": "current"}, - {"y": 3, "x": 1, "rev": "current"}, + {"y": 2, "x": 0, "rev": "workspace"}, + {"y": 3, "x": 1, "rev": "workspace"}, ], sort_keys=True, ) @@ -144,8 +152,8 @@ def test_plot_confusion(tmp_dir, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"predicted": "B", "actual": "A", "rev": "current"}, - {"predicted": "A", "actual": "A", "rev": "current"}, + {"predicted": "B", "actual": "A", "rev": "workspace"}, + {"predicted": "A", "actual": "A", "rev": "workspace"}, ], sort_keys=True, ) @@ -156,7 +164,7 @@ def test_plot_confusion(tmp_dir, dvc): def test_plot_multiple_revs(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), "template.dvct" + fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), "template.vt" ) metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] @@ -173,7 +181,7 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): result = dvc.plot( "metric.json", - template="template.dvct", + template="template.vt", revisions=["HEAD", "v2", "v1"], file="result.html", ) @@ -237,28 +245,28 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): assert len(caplog.messages) == 0 assert ( - "Could not find 'metric.json' on any of the revisions: 'v1, current'" + "Could not find 'metric.json' on any of the revisions: 'v1, workspace'" == str(error.value) ) def test_custom_template(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), - fspath(tmp_dir / "newtemplate.dvct"), + fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), + fspath(tmp_dir / "newtemplate.vt"), ) metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", "newtemplate.dvct") + result = dvc.plot("metric.json", "newtemplate.vt") page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"a": 1, "b": 2, "rev": "current"}, - {"a": 2, "b": 3, "rev": "current"}, + {"a": 1, "b": 2, "rev": "workspace"}, + {"a": 2, "b": 3, "rev": "workspace"}, ], sort_keys=True, ) @@ -273,11 +281,11 @@ def _replace(path, src, dst): def test_custom_template_with_specified_data(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), - fspath(tmp_dir / "newtemplate.dvct"), + fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), + fspath(tmp_dir / "newtemplate.vt"), ) _replace( - tmp_dir / "newtemplate.dvct", + tmp_dir / "newtemplate.vt", "DVC_METRIC_DATA", "DVC_METRIC_DATA,metric.json", ) @@ -286,13 +294,13 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot(datafile=None, template="newtemplate.dvct") + result = dvc.plot(datafile=None, template="newtemplate.vt") page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"a": 1, "b": 2, "rev": "current"}, - {"a": 2, "b": 3, "rev": "current"}, + {"a": 1, "b": 2, "rev": "workspace"}, + {"a": 2, "b": 3, "rev": "workspace"}, ], sort_keys=True, ) @@ -303,11 +311,11 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): def test_plot_override_specified_data_source(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.dvct"), - fspath(tmp_dir / "newtemplate.dvct"), + fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), + fspath(tmp_dir / "newtemplate.vt"), ) _replace( - tmp_dir / "newtemplate.dvct", + tmp_dir / "newtemplate.vt", "DVC_METRIC_DATA", "DVC_METRIC_DATA,metric.json", ) @@ -316,13 +324,13 @@ def test_plot_override_specified_data_source(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric2.json") _run_with_metric(tmp_dir, "metric2.json", "init", "v1") - result = dvc.plot(datafile="metric2.json", template="newtemplate.dvct") + result = dvc.plot(datafile="metric2.json", template="newtemplate.vt") page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"a": 1, "b": 2, "rev": "current"}, - {"a": 2, "b": 3, "rev": "current"}, + {"a": 1, "b": 2, "rev": "workspace"}, + {"a": 2, "b": 3, "rev": "workspace"}, ], sort_keys=True, ) @@ -335,18 +343,18 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): template = DefaultLinearTemplate.DEFAULT_CONTENT template["data"] = {"values": ""} - (tmp_dir / "template.dvct").write_text(json.dumps(template)) + (tmp_dir / "template.vt").write_text(json.dumps(template)) metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", "template.dvct") + result = dvc.plot("metric.json", "template.vt") result_content = json.loads((tmp_dir / result).read_text()) vega_data = [ - {"x": 1, "y": 2, "rev": "current"}, - {"x": 2, "y": 3, "rev": "current"}, + {"x": 1, "y": 2, "rev": "workspace"}, + {"x": 2, "y": 3, "rev": "workspace"}, ] assert vega_data == result_content["data"]["values"] @@ -355,3 +363,18 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): with pytest.raises(NoDataNorTemplateProvided): dvc.plot() + + +def test_should_raise_on_no_template(tmp_dir, dvc): + with pytest.raises(TemplateNotFound): + dvc.plot("metric.json", "non_existing_template.vt") + + +def test_plot_no_data(tmp_dir, dvc): + with pytest.raises(NoDataForTemplateError): + dvc.plot(template="default") + + +def test_plot_wrong_metric_type(tmp_dir, dvc): + with pytest.raises(PlotMetricTypeError): + dvc.plot(datafile="metric.txt") From 19e8d26b3626068db720df4a434be3217ccab4cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 14 Apr 2020 18:14:34 +0200 Subject: [PATCH 053/102] add yaml metrics support --- dvc/repo/plot.py | 55 +++++++++++++++++++++++++++-------------- tests/func/test_plot.py | 4 +-- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index b7c9fe4672..fb63e20547 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -5,6 +5,7 @@ from collections import OrderedDict from funcy import first +from ruamel import yaml from dvc.exceptions import DvcException from dvc.plot import Template @@ -45,7 +46,6 @@ def __init__(self): super().__init__("Datafile or template is not specified.") -# TODO support yaml class PlotMetricTypeError(DvcException): def __init__(self, path): super().__init__( @@ -57,18 +57,29 @@ def __init__(self, path): WORKSPACE_REVISION_NAME = "workspace" -def _load_from_tree(tree, datafile, default_plot=False): - if datafile.endswith(".json"): - data = _parse_json(datafile, default_plot, tree) +def _parse(datafile, default_plot, tree, loading_function): + with tree.open(datafile, "r") as fobj: + data = loading_function(fobj) + assert isinstance(data, list) + if default_plot: + assert all(len(e) >= 1 for e in data) + last_key = list(first(data).keys())[-1] + data = [{"y": d[last_key], "x": i} for i, d in enumerate(data)] + return data - elif datafile.endswith(".csv"): - data = _parse_csv(datafile, default_plot, tree) - elif datafile.endswith(".tsv"): - data = _parse_csv(datafile, default_plot, tree, "\t") - else: - raise PlotMetricTypeError(datafile) - return data +def _parse_yaml(datafile, default_plot, tree): + def load_yaml(fobj): + return yaml.load(fobj) + + return _parse(datafile, default_plot, tree, load_yaml) + + +def _parse_json(datafile, default_plot, tree): + def load_json(fobj): + return json.load(fobj, object_pairs_hook=OrderedDict) + + return _parse(datafile, default_plot, tree, load_json) def _parse_csv(datafile, default_plot, tree, delimiter=","): @@ -93,14 +104,20 @@ def _parse_csv(datafile, default_plot, tree, delimiter=","): return data -def _parse_json(datafile, default_plot, tree): - with tree.open(datafile, "r") as fobj: - data = json.load(fobj, object_pairs_hook=OrderedDict) - assert isinstance(data, list) - if default_plot: - assert all(len(e) >= 1 for e in data) - last_key = list(first(data).keys())[-1] - data = [{"y": d[last_key], "x": i} for i, d in enumerate(data)] +def _load_from_tree(tree, datafile, default_plot=False): + filename = datafile.lower() + if filename.endswith(".json"): + data = _parse_json(datafile, default_plot, tree) + + elif filename.endswith(".csv"): + data = _parse_csv(datafile, default_plot, tree) + elif filename.endswith(".tsv"): + data = _parse_csv(datafile, default_plot, tree, "\t") + elif filename.endswith(".yaml"): + data = _parse_yaml(datafile, default_plot, tree) + else: + raise PlotMetricTypeError(datafile) + return data diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 468dc1c149..5b9383ea7d 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -75,8 +75,8 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): metric = [ - OrderedDict([("first_val", 100), ("val", 2)]), - OrderedDict([("first_val", 200), ("val", 3)]), + OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), ] _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") From accf07ca7a1977fac6459758bfa03354da6115d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 14 Apr 2020 18:16:41 +0200 Subject: [PATCH 054/102] fixup --- dvc/repo/plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index fb63e20547..9826be4f1d 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -108,7 +108,6 @@ def _load_from_tree(tree, datafile, default_plot=False): filename = datafile.lower() if filename.endswith(".json"): data = _parse_json(datafile, default_plot, tree) - elif filename.endswith(".csv"): data = _parse_csv(datafile, default_plot, tree) elif filename.endswith(".tsv"): From df45b48f1ac8e6444afee2b431b6561775452275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 14 Apr 2020 20:11:41 +0200 Subject: [PATCH 055/102] some more suggestions --- dvc/plot.py | 24 ++------- dvc/repo/plot.py | 109 ++++++++++++++++++++++------------------ tests/func/test_plot.py | 3 +- 3 files changed, 67 insertions(+), 69 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index f98cbf4ba7..8e025bf418 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -122,21 +122,7 @@ def get_datafile(placeholder_string): ) @staticmethod - def fill(template_path, data, priority_datafile=None, result_path=None): - if not result_path: - filename = os.path.basename(template_path) - filename.replace(Template.EXTENTION, ".html") - result_path = os.path.join(os.getcwd(), filename) - - result_content = Template._fill(template_path, data, priority_datafile) - - with open(result_path, "w") as fobj: - fobj.write(result_content) - - return result_path - - @staticmethod - def _fill(template_path, data, priority_datafile): + def fill(template_path, data, result_path, priority_datafile=None): with open(template_path, "r") as fobj: result_content = fobj.read() @@ -148,9 +134,6 @@ def _fill(template_path, data, priority_datafile): else: key = file - if not key: - raise NoDataForTemplateError(template_path) - result_content = result_content.replace( placeholder, json.dumps( @@ -160,7 +143,10 @@ def _fill(template_path, data, priority_datafile): sort_keys=True, ), ) - return result_content + with open(result_path, "w") as fobj: + fobj.write(result_content) + + return result_path class DefaultLinearTemplate(Template): diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 9826be4f1d..07f8c52c59 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -7,8 +7,8 @@ from funcy import first from ruamel import yaml -from dvc.exceptions import DvcException -from dvc.plot import Template +from dvc.exceptions import DvcException, PathMissingError +from dvc.plot import Template, NoDataForTemplateError from dvc.repo import locked logger = logging.getLogger(__name__) @@ -57,10 +57,8 @@ def __init__(self, path): WORKSPACE_REVISION_NAME = "workspace" -def _parse(datafile, default_plot, tree, loading_function): - with tree.open(datafile, "r") as fobj: - data = loading_function(fobj) - assert isinstance(data, list) +def _parse(data, default_plot): + assert isinstance(data, list) if default_plot: assert all(len(e) >= 1 for e in data) last_key = list(first(data).keys())[-1] @@ -68,52 +66,49 @@ def _parse(datafile, default_plot, tree, loading_function): return data -def _parse_yaml(datafile, default_plot, tree): - def load_yaml(fobj): - return yaml.load(fobj) +def _parse_yaml(fobj, default_plot): + data = yaml.load(fobj) - return _parse(datafile, default_plot, tree, load_yaml) + return _parse(data, default_plot) -def _parse_json(datafile, default_plot, tree): - def load_json(fobj): - return json.load(fobj, object_pairs_hook=OrderedDict) +def _parse_json(fobj, default_plot): + data = json.load(fobj, object_pairs_hook=OrderedDict) - return _parse(datafile, default_plot, tree, load_json) + return _parse(data, default_plot) -def _parse_csv(datafile, default_plot, tree, delimiter=","): - with tree.open(datafile, "r") as fobj: - if default_plot: - data = [] - for index, row in enumerate(csv.reader(fobj, delimiter=delimiter)): - assert len(row) >= 1 - if index == 0 and len(row) > 1: - # skip header - continue - data.append({"y": row[-1], "x": index}) - else: - data = [ - row - for row in ( - csv.DictReader( - fobj, skipinitialspace=True, delimiter=delimiter - ) +def _parse_csv(fobj, default_plot, delimiter=","): + if default_plot: + data = [] + for index, row in enumerate(csv.reader(fobj, delimiter=delimiter)): + assert len(row) >= 1 + if index == 0 and len(row) > 1: + # skip header + continue + data.append({"y": row[-1], "x": index}) + else: + data = [ + row + for row in ( + csv.DictReader( + fobj, skipinitialspace=True, delimiter=delimiter ) - ] + ) + ] return data -def _load_from_tree(tree, datafile, default_plot=False): +def _load_from(fobj, datafile, default_plot=False): filename = datafile.lower() if filename.endswith(".json"): - data = _parse_json(datafile, default_plot, tree) + data = _parse_json(fobj, default_plot) elif filename.endswith(".csv"): - data = _parse_csv(datafile, default_plot, tree) + data = _parse_csv(fobj, default_plot) elif filename.endswith(".tsv"): - data = _parse_csv(datafile, default_plot, tree, "\t") + data = _parse_csv(fobj, default_plot, "\t") elif filename.endswith(".yaml"): - data = _parse_yaml(datafile, default_plot, tree) + data = _parse_yaml(fobj, default_plot) else: raise PlotMetricTypeError(datafile) @@ -121,16 +116,24 @@ def _load_from_tree(tree, datafile, default_plot=False): def _load_from_revision(repo, datafile, revision, default_plot=False): - if revision is WORKSPACE_REVISION_NAME: - tree = repo.tree - else: - tree = repo.scm.get_tree(revision) - try: - data = _load_from_tree(tree, datafile, default_plot) - for d in data: - d["rev"] = revision - except FileNotFoundError: + if revision is WORKSPACE_REVISION_NAME: + + def open_datafile(): + return repo.tree.open(datafile, "r") + + else: + + def open_datafile(): + from dvc import api + + return api.open(datafile, repo.root_dir, revision) + + with open_datafile() as fobj: + data = _load_from(fobj, datafile, default_plot) + for d in data: + d["rev"] = revision + except (FileNotFoundError, PathMissingError): raise NoMetricOnRevisionError(datafile, revision) return data @@ -192,9 +195,17 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): for datafile in template_datafiles } - result_path = Template.fill( - template_path, data, datafile, result_path=file - ) + if not file: + if datafile: + file = datafile + else: + file = first(template_datafiles) + if not file: + raise NoDataForTemplateError(template_path) + + file += ".html" + + result_path = Template.fill(template_path, data, file, datafile) logger.info("file://{}".format(os.path.join(repo.root_dir, result_path))) return result_path diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 5b9383ea7d..e95534739f 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -375,6 +375,7 @@ def test_plot_no_data(tmp_dir, dvc): dvc.plot(template="default") -def test_plot_wrong_metric_type(tmp_dir, dvc): +def test_plot_wrong_metric_type(tmp_dir, scm, dvc): + tmp_dir.scm_gen("metric.txt", "content", commit="initial") with pytest.raises(PlotMetricTypeError): dvc.plot(datafile="metric.txt") From 263262a789717ccab9e98522ed256564e9106c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 14 Apr 2020 20:23:39 +0200 Subject: [PATCH 056/102] default filename fix --- dvc/repo/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 07f8c52c59..dd7d2c387a 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -203,7 +203,7 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): if not file: raise NoDataForTemplateError(template_path) - file += ".html" + file = "".join(file.split(".")[:-1] or file) + ".html" result_path = Template.fill(template_path, data, file, datafile) logger.info("file://{}".format(os.path.join(repo.root_dir, result_path))) From 8d85e3d798288f64ea0e87db631acb0f29fbe162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 15 Apr 2020 13:42:39 +0200 Subject: [PATCH 057/102] efiop review requests --- dvc/command/plot.py | 59 ++++++++++++++++++--------------- dvc/plot.py | 2 +- dvc/repo/plot.py | 11 +++--- tests/func/test_plot.py | 6 ++-- tests/unit/command/test_plot.py | 9 +++-- 5 files changed, 49 insertions(+), 38 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 5b577cf552..f91760bdc6 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -1,5 +1,6 @@ import argparse import logging +import os from dvc.command.base import append_doc_link, CmdBase, fix_subparsers from dvc.exceptions import DvcException @@ -8,36 +9,40 @@ logger = logging.getLogger(__name__) +def _run_plot(repo, datafile, template, revisions, file): + try: + result = repo.plot( + datafile=datafile, + template=template, + revisions=revisions, + file=file, + ) + except DvcException: + return 1 + logger.info("file://{}".format(os.path.join(repo.root_dir, result))) + return 0 + + class CmdPlotShow(CmdBase): def run(self): - try: - self.repo.plot( - datafile=self.args.datafile, - template=self.args.template, - file=self.args.file, - ) - - except DvcException: - logger.exception("plot show:") - return 1 - return 0 + return _run_plot( + self.repo, + self.args.datafile, + self.args.template, + None, + self.args.file, + ) class CmdPlotDiff(CmdBase): def run(self): - try: - self.repo.plot( - self.args.datafile, - self.args.template, - revisions=self.args.revisions, - file=self.args.file, - ) - - except DvcException: - logger.exception("plot diff:") - return 1 - - return 0 + return _run_plot( + self.repo, + self.args.datafile, + self.args.template, + self.args.revisions, + self.args.file, + ) def add_parser(subparsers, parent_parser): @@ -85,8 +90,10 @@ def add_parser(subparsers, parent_parser): ) plot_show_parser.set_defaults(func=CmdPlotShow) - PLOT_DIFF_HELP = "Plot changes in metrics between commits" - " in the DVC repository, or between a commit and the workspace." + PLOT_DIFF_HELP = ( + "Plot changes in metrics between commits" + " in the DVC repository, or between a commit and the workspace." + ) plot_diff_parser = plot_subparsers.add_parser( "diff", parents=[parent_parser], diff --git a/dvc/plot.py b/dvc/plot.py index 8e025bf418..975b2de58f 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -16,7 +16,7 @@ class TemplateNotFound(DvcException): def __init__(self, path): - super().__init__("Template: '{}' not found.".format(path)) + super().__init__("Template '{}' not found.".format(path)) class NoDataForTemplateError(DvcException): diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index dd7d2c387a..351ddcf5e3 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -17,7 +17,7 @@ class NoMetricInHistoryError(DvcException): def __init__(self, path, revisions): super().__init__( - "Could not find '{}' on any of the revisions: " + "Could not find '{}' on any of the revisions " "'{}'".format(path, ", ".join(revisions)) ) @@ -27,7 +27,7 @@ def __init__(self, path, revision): self.path = path self.revision = revision super().__init__( - "Could not find '{}' on revision: " "'{}'".format(path, revision) + "Could not find '{}' on revision " "'{}'".format(path, revision) ) @@ -41,7 +41,7 @@ def __init__(self, datafile, template_datafiles): ) -class NoDataNorTemplateProvided(DvcException): +class NoDataOrTemplateProvided(DvcException): def __init__(self): super().__init__("Datafile or template is not specified.") @@ -159,7 +159,7 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): if not data and exceptions: raise NoMetricInHistoryError(datafile, revisions) - elif exceptions: + else: for e in exceptions: logger.warning( "File '{}' was not found at: '{}'. It will not be " @@ -183,7 +183,7 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): revisions = [] if not datafile and not template: - raise NoDataNorTemplateProvided() + raise NoDataOrTemplateProvided() template_path = _evaluate_templatepath(repo, template) @@ -206,7 +206,6 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): file = "".join(file.split(".")[:-1] or file) + ".html" result_path = Template.fill(template_path, data, file, datafile) - logger.info("file://{}".format(os.path.join(repo.root_dir, result_path))) return result_path diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index e95534739f..53b2da0ce7 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -16,7 +16,7 @@ ) from dvc.repo.plot import ( NoMetricInHistoryError, - NoDataNorTemplateProvided, + NoDataOrTemplateProvided, PlotMetricTypeError, ) @@ -245,7 +245,7 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): assert len(caplog.messages) == 0 assert ( - "Could not find 'metric.json' on any of the revisions: 'v1, workspace'" + "Could not find 'metric.json' on any of the revisions 'v1, workspace'" == str(error.value) ) @@ -361,7 +361,7 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): - with pytest.raises(NoDataNorTemplateProvided): + with pytest.raises(NoDataOrTemplateProvided): dvc.plot() diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 3303bbee76..5374b4f5cf 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -23,14 +23,15 @@ def test_metrics_diff(dvc, mocker): cmd = cli_args.func(cli_args) m = mocker.patch.object(cmd.repo, "plot", autospec=True) + mocker.patch("os.path.join") assert cmd.run() == 0 m.assert_called_once_with( datafile="datafile", template="template", - file="result.extension", revisions=["HEAD", "tag1", "tag2"], + file="result.extension", ) @@ -51,9 +52,13 @@ def test_metrics_show(dvc, mocker): cmd = cli_args.func(cli_args) m = mocker.patch.object(cmd.repo, "plot", autospec=True) + mocker.patch("os.path.join") assert cmd.run() == 0 m.assert_called_once_with( - datafile="datafile", template="template", file="result.extension", + datafile="datafile", + template="template", + file="result.extension", + revisions=None, ) From 29881682a2c0ea1f2be72289be378b4d2a9b000b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 15 Apr 2020 14:01:59 +0200 Subject: [PATCH 058/102] log exception on failur --- dvc/command/plot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index f91760bdc6..5b99e36150 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -18,6 +18,7 @@ def _run_plot(repo, datafile, template, revisions, file): file=file, ) except DvcException: + logger.exception("") return 1 logger.info("file://{}".format(os.path.join(repo.root_dir, result))) return 0 From 4e32431096d1ef69efa148167171b0f866345b63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 15 Apr 2020 15:23:20 +0200 Subject: [PATCH 059/102] move revisions deduction to commands --- dvc/command/plot.py | 64 ++++++++++++++++----------------- dvc/plot.py | 4 ++- dvc/repo/plot.py | 11 ++---- tests/func/test_plot.py | 4 +-- tests/unit/command/test_plot.py | 25 +++++++++++-- tests/unit/repo/test_plot.py | 62 -------------------------------- 6 files changed, 63 insertions(+), 107 deletions(-) delete mode 100644 tests/unit/repo/test_plot.py diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 5b99e36150..f5a07e4cbf 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -4,46 +4,46 @@ from dvc.command.base import append_doc_link, CmdBase, fix_subparsers from dvc.exceptions import DvcException +from dvc.repo.plot import WORKSPACE_REVISION_NAME from dvc.utils import format_link logger = logging.getLogger(__name__) -def _run_plot(repo, datafile, template, revisions, file): - try: - result = repo.plot( - datafile=datafile, - template=template, - revisions=revisions, - file=file, - ) - except DvcException: - logger.exception("") - return 1 - logger.info("file://{}".format(os.path.join(repo.root_dir, result))) - return 0 - +class CmdPLot(CmdBase): + def _revisions(self): + raise NotImplementedError -class CmdPlotShow(CmdBase): def run(self): - return _run_plot( - self.repo, - self.args.datafile, - self.args.template, - None, - self.args.file, + try: + result = self.repo.plot( + datafile=self.args.datafile, + template=self.args.template, + revisions=self._revisions(), + file=self.args.file, + ) + except DvcException: + logger.exception("") + return 1 + logger.info( + "file://{}".format(os.path.join(self.repo.root_dir, result)) ) + return 0 -class CmdPlotDiff(CmdBase): - def run(self): - return _run_plot( - self.repo, - self.args.datafile, - self.args.template, - self.args.revisions, - self.args.file, - ) +class CmdPlotShow(CmdPLot): + def _revisions(self): + return None + + +class CmdPlotDiff(CmdPLot): + def _revisions(self): + revisions = self.args.revisions or [] + if len(revisions) <= 1: + if len(revisions) == 0 and self.repo.scm.is_dirty(): + revisions.append("HEAD") + revisions.append(WORKSPACE_REVISION_NAME) + return revisions def add_parser(subparsers, parent_parser): @@ -87,7 +87,7 @@ def add_parser(subparsers, parent_parser): "datafile", nargs="?", default=None, help="Data to be visualized." ) plot_show_parser.add_argument( - "-f", "--file", help="Specify name of the file it generates." + "-f", "--file", help="Name of the generated file." ) plot_show_parser.set_defaults(func=CmdPlotShow) @@ -117,7 +117,7 @@ def add_parser(subparsers, parent_parser): help="Data to be visualized.", ) plot_diff_parser.add_argument( - "-f", "--file", help="Specify name of the file it generates." + "-f", "--file", help="Name of the generated file." ) plot_diff_parser.add_argument( "revisions", diff --git a/dvc/plot.py b/dvc/plot.py index 975b2de58f..478c2df08a 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -21,7 +21,9 @@ def __init__(self, path): class NoDataForTemplateError(DvcException): def __init__(self, template_path): - super().__init__("No data provided for '{}'.".format(template_path)) + super().__init__( + "No data provided for '{}'.".format(os.path.relpath(template_path)) + ) PAGE_HTML = """ diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 351ddcf5e3..4f9026382b 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -47,10 +47,10 @@ def __init__(self): class PlotMetricTypeError(DvcException): - def __init__(self, path): + def __init__(self, file): super().__init__( "'{}' - file type error\n" - "Only json, yaml, csv and tsv types are supported.".format(path) + "Only json, yaml, csv and tsv types are supported.".format(file) ) @@ -142,11 +142,6 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): data = [] exceptions = [] - if len(revisions) <= 1: - if len(revisions) == 0 and repo.scm.is_dirty(): - revisions.append("HEAD") - revisions.append(WORKSPACE_REVISION_NAME) - for rev in revisions: try: data.extend( @@ -180,7 +175,7 @@ def _evaluate_templatepath(repo, template=None): @locked def plot(repo, datafile=None, template=None, revisions=None, file=None): if revisions is None: - revisions = [] + revisions = [WORKSPACE_REVISION_NAME] if not datafile and not template: raise NoDataOrTemplateProvided() diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 53b2da0ce7..f0bfed6375 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -245,8 +245,8 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): assert len(caplog.messages) == 0 assert ( - "Could not find 'metric.json' on any of the revisions 'v1, workspace'" - == str(error.value) + str(error.value) + == "Could not find 'metric.json' on any of the revisions 'v1'" ) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 5374b4f5cf..86ccfe3994 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -1,8 +1,10 @@ +import pytest + from dvc.cli import parse_args from dvc.command.plot import CmdPlotShow, CmdPlotDiff -def test_metrics_diff(dvc, mocker): +def test_metrics_diff(mocker): cli_args = parse_args( [ "plot", @@ -35,7 +37,7 @@ def test_metrics_diff(dvc, mocker): ) -def test_metrics_show(dvc, mocker): +def test_metrics_show(mocker): cli_args = parse_args( [ "plot", @@ -62,3 +64,22 @@ def test_metrics_show(dvc, mocker): file="result.extension", revisions=None, ) + + +@pytest.mark.parametrize( + "arg_revisions,is_dirty,expected_revisions", + [ + ([], False, ["workspace"]), + ([], True, ["HEAD", "workspace"]), + (["v1", "v2", "workspace"], False, ["v1", "v2", "workspace"]), + (["v1", "v2", "workspace"], True, ["v1", "v2", "workspace"]), + ], +) +def test_revisions(mocker, arg_revisions, is_dirty, expected_revisions): + args = mocker.MagicMock() + + cmd = CmdPlotDiff(args) + mocker.patch.object(args, "revisions", arg_revisions) + mocker.patch.object(cmd.repo.scm, "is_dirty", return_value=is_dirty) + + assert cmd._revisions() == expected_revisions diff --git a/tests/unit/repo/test_plot.py b/tests/unit/repo/test_plot.py deleted file mode 100644 index 2c64412a87..0000000000 --- a/tests/unit/repo/test_plot.py +++ /dev/null @@ -1,62 +0,0 @@ -from dvc.repo.plot import _load_from_revisions, WORKSPACE_REVISION_NAME - - -def test_load_no_revisions_clean(mocker): - m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = mocker.MagicMock() - repo.scm.is_dirty.return_value = False - - _load_from_revisions(repo, "datafile", [], False) - - assert m.call_count == 1 - assert m.call_args_list[0] == mocker.call( - repo, "datafile", WORKSPACE_REVISION_NAME, default_plot=False - ) - - -def test_load_no_revisions_dirty(mocker): - m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = mocker.MagicMock() - repo.scm.is_dirty.return_value = True - - _load_from_revisions(repo, "datafile", [], False) - - assert m.call_count == 2 - assert m.call_args_list[0] == mocker.call( - repo, "datafile", "HEAD", default_plot=False - ) - assert m.call_args_list[1] == mocker.call( - repo, "datafile", WORKSPACE_REVISION_NAME, default_plot=False - ) - - -def test_load_one(mocker): - m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = mocker.MagicMock() - repo.scm.is_dirty.return_value = True - - _load_from_revisions(repo, "datafile", ["tag1"], False) - - assert m.call_count == 2 - assert m.call_args_list[0] == mocker.call( - repo, "datafile", "tag1", default_plot=False - ) - assert m.call_args_list[1] == mocker.call( - repo, "datafile", WORKSPACE_REVISION_NAME, default_plot=False - ) - - -def test_load_more(mocker): - m = mocker.patch("dvc.repo.plot._load_from_revision") - repo = mocker.MagicMock() - repo.scm.is_dirty.return_value = True - - _load_from_revisions(repo, "datafile", ["tag1", "tag2"], False) - - assert m.call_count == 2 - assert m.call_args_list[0] == mocker.call( - repo, "datafile", "tag1", default_plot=False - ) - assert m.call_args_list[1] == mocker.call( - repo, "datafile", "tag2", default_plot=False - ) From 28ae4d9ae3b9e981196d69b9f05d1c9e2aa49a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 16 Apr 2020 14:07:12 +0200 Subject: [PATCH 060/102] json templates --- dvc/command/plot.py | 14 +++++++++ dvc/plot.py | 35 +++++++++++++++++------ dvc/repo/plot.py | 8 ++++-- tests/func/test_plot.py | 50 ++++++++++++++++++--------------- tests/unit/command/test_plot.py | 4 +++ 5 files changed, 77 insertions(+), 34 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index f5a07e4cbf..a6a9799429 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -21,6 +21,7 @@ def run(self): template=self.args.template, revisions=self._revisions(), file=self.args.file, + embed=not self.args.no_embed, ) except DvcException: logger.exception("") @@ -89,6 +90,12 @@ def add_parser(subparsers, parent_parser): plot_show_parser.add_argument( "-f", "--file", help="Name of the generated file." ) + plot_show_parser.add_argument( + "--no-embed", + action="store_true", + default=False, + help="Do not wrap vega plot json with HTML.", + ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -125,4 +132,11 @@ def add_parser(subparsers, parent_parser): default=None, help="Git revisions to plot from", ) + + plot_diff_parser.add_argument( + "--no-embed", + action="store_true", + default=False, + help="Do not wrap vega plot json with HTML.", + ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/plot.py b/dvc/plot.py index 478c2df08a..2b17853147 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -64,7 +64,7 @@ def _prepare_div(vega_dict): class Template: INDENT = 4 SEPARATORS = (",", ": ") - EXTENTION = ".vt" + EXTENSION = ".json" METRIC_DATA_STRING = "" def __init__(self, templates_dir): @@ -76,14 +76,18 @@ def dump(self): if not os.path.exists(self.plot_templates_dir): makedirs(self.plot_templates_dir) - div = _prepare_div(self.DEFAULT_CONTENT) - - _save_plot_html( - [div], + with open( os.path.join( - self.plot_templates_dir, self.TEMPLATE_NAME + self.EXTENTION + self.plot_templates_dir, self.TEMPLATE_NAME + self.EXTENSION ), - ) + "w", + ) as fobj: + json.dump( + self.DEFAULT_CONTENT, + fobj, + indent=self.INDENT, + separators=self.SEPARATORS, + ) def load_template(self, path): try: @@ -124,7 +128,9 @@ def get_datafile(placeholder_string): ) @staticmethod - def fill(template_path, data, result_path, priority_datafile=None): + def fill( + template_path, data, result_path, priority_datafile=None, embed=True + ): with open(template_path, "r") as fobj: result_content = fobj.read() @@ -145,11 +151,22 @@ def fill(template_path, data, result_path, priority_datafile=None): sort_keys=True, ), ) + + if embed: + result_content = Template._embed(result_content) + with open(result_path, "w") as fobj: fobj.write(result_content) return result_path + @staticmethod + def _embed(vega_json_string): + # TODO what about id? + # TODO 2 what about supporting multiple plots, eg as json list? + div = DIV_HTML.format(id="dvc_plot", vega_json=vega_json_string) + return PAGE_HTML.format(divs=div) + class DefaultLinearTemplate(Template): TEMPLATE_NAME = "default" @@ -195,7 +212,7 @@ def templates_dir(self): @cached_property def default_template(self): - return os.path.join(self.templates_dir, "default.vt") + return os.path.join(self.templates_dir, "default.json") def get_template(self, path): t_path = os.path.join(self.templates_dir, path) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 4f9026382b..38bb905034 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -173,7 +173,9 @@ def _evaluate_templatepath(repo, template=None): @locked -def plot(repo, datafile=None, template=None, revisions=None, file=None): +def plot( + repo, datafile=None, template=None, revisions=None, file=None, embed=False +): if revisions is None: revisions = [WORKSPACE_REVISION_NAME] @@ -200,7 +202,9 @@ def plot(repo, datafile=None, template=None, revisions=None, file=None): file = "".join(file.split(".")[:-1] or file) + ".html" - result_path = Template.fill(template_path, data, file, datafile) + result_path = Template.fill( + template_path, data, file, datafile, embed=embed + ) return result_path diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index f0bfed6375..7537a747bb 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -59,7 +59,8 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") - result = dvc.plot("metric.csv") + result = dvc.plot("metric.csv", embed=True) + page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ @@ -81,7 +82,7 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") - result = dvc.plot("metric.csv") + result = dvc.plot("metric.csv", embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ @@ -101,7 +102,7 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot("metric.json") + result = dvc.plot("metric.json", embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -124,7 +125,7 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot("metric.json") + result = dvc.plot("metric.json", embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -147,7 +148,7 @@ def test_plot_confusion(tmp_dir, dvc): _write_json(tmp_dir, confusion_matrix, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot(datafile="metric.json", template="confusion") + result = dvc.plot(datafile="metric.json", template="confusion", embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -164,7 +165,7 @@ def test_plot_confusion(tmp_dir, dvc): def test_plot_multiple_revs(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), "template.vt" + fspath(tmp_dir / ".dvc" / "plot" / "default.json"), "template.json" ) metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] @@ -181,9 +182,10 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): result = dvc.plot( "metric.json", - template="template.vt", + template="template.json", revisions=["HEAD", "v2", "v1"], file="result.html", + embed=True, ) page_content = BeautifulSoup((tmp_dir / result).read_text()) @@ -213,7 +215,7 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): caplog.clear() with caplog.at_level(logging.WARNING, "dvc"): - result = dvc.plot("metric.json", revisions=["v1", "v2"]) + result = dvc.plot("metric.json", revisions=["v1", "v2"], embed=True) assert ( first(caplog.messages) == "File 'metric.json' was not found at: 'v1'. It will not be plotted." @@ -239,7 +241,7 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): with pytest.raises(NoMetricInHistoryError) as error, caplog.at_level( logging.WARNING, "dvc" ): - dvc.plot("metric.json", revisions=["v1"]) + dvc.plot("metric.json", revisions=["v1"], embed=True) # do not warn if none found assert len(caplog.messages) == 0 @@ -252,15 +254,15 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): def test_custom_template(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), - fspath(tmp_dir / "newtemplate.vt"), + fspath(tmp_dir / ".dvc" / "plot" / "default.json"), + fspath(tmp_dir / "newtemplate.json"), ) metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", "newtemplate.vt") + result = dvc.plot("metric.json", "newtemplate.json", embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -281,11 +283,11 @@ def _replace(path, src, dst): def test_custom_template_with_specified_data(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), - fspath(tmp_dir / "newtemplate.vt"), + fspath(tmp_dir / ".dvc" / "plot" / "default.json"), + fspath(tmp_dir / "newtemplate.json"), ) _replace( - tmp_dir / "newtemplate.vt", + tmp_dir / "newtemplate.json", "DVC_METRIC_DATA", "DVC_METRIC_DATA,metric.json", ) @@ -294,7 +296,7 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot(datafile=None, template="newtemplate.vt") + result = dvc.plot(datafile=None, template="newtemplate.json", embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -311,11 +313,11 @@ def test_custom_template_with_specified_data(tmp_dir, scm, dvc): def test_plot_override_specified_data_source(tmp_dir, scm, dvc): shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.vt"), - fspath(tmp_dir / "newtemplate.vt"), + fspath(tmp_dir / ".dvc" / "plot" / "default.json"), + fspath(tmp_dir / "newtemplate.json"), ) _replace( - tmp_dir / "newtemplate.vt", + tmp_dir / "newtemplate.json", "DVC_METRIC_DATA", "DVC_METRIC_DATA,metric.json", ) @@ -324,7 +326,9 @@ def test_plot_override_specified_data_source(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric2.json") _run_with_metric(tmp_dir, "metric2.json", "init", "v1") - result = dvc.plot(datafile="metric2.json", template="newtemplate.vt") + result = dvc.plot( + datafile="metric2.json", template="newtemplate.json", embed=True + ) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -343,13 +347,13 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): template = DefaultLinearTemplate.DEFAULT_CONTENT template["data"] = {"values": ""} - (tmp_dir / "template.vt").write_text(json.dumps(template)) + (tmp_dir / "template.json").write_text(json.dumps(template)) metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", "template.vt") + result = dvc.plot("metric.json", "template.json", embed=False) result_content = json.loads((tmp_dir / result).read_text()) vega_data = [ @@ -367,7 +371,7 @@ def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): def test_should_raise_on_no_template(tmp_dir, dvc): with pytest.raises(TemplateNotFound): - dvc.plot("metric.json", "non_existing_template.vt") + dvc.plot("metric.json", "non_existing_template.json") def test_plot_no_data(tmp_dir, dvc): diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 86ccfe3994..c8678190ad 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -15,6 +15,7 @@ def test_metrics_diff(mocker): "template", "-d", "datafile", + "--no-embed", "HEAD", "tag1", "tag2", @@ -34,6 +35,7 @@ def test_metrics_diff(mocker): template="template", revisions=["HEAD", "tag1", "tag2"], file="result.extension", + embed=False, ) @@ -46,6 +48,7 @@ def test_metrics_show(mocker): "result.extension", "-t", "template", + "--no-embed", "datafile", ] ) @@ -63,6 +66,7 @@ def test_metrics_show(mocker): template="template", file="result.extension", revisions=None, + embed=False, ) From ff648d8f339da9e22d15288e71b18a0ea382f523 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 16 Apr 2020 16:50:24 +0200 Subject: [PATCH 061/102] extract template filling to separate method --- dvc/command/plot.py | 2 +- dvc/repo/__init__.py | 2 +- dvc/repo/plot.py | 80 +++++++++++++++++++++++++++--------- dvc/{plot.py => template.py} | 59 +++----------------------- tests/func/test_plot.py | 20 +++++---- 5 files changed, 78 insertions(+), 85 deletions(-) rename dvc/{plot.py => template.py} (78%) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index a6a9799429..9fc1d8570b 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -20,7 +20,7 @@ def run(self): datafile=self.args.datafile, template=self.args.template, revisions=self._revisions(), - file=self.args.file, + fname=self.args.file, embed=not self.args.no_embed, ) except DvcException: diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 0165c794ff..f4740d3b6f 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -16,7 +16,7 @@ OutputNotFoundError, ) from dvc.path_info import PathInfo -from dvc.plot import PlotTemplates +from dvc.template import PlotTemplates from dvc.remote.base import RemoteActionNotImplemented from dvc.utils.fs import path_isin from .graph import check_acyclic, get_pipeline, get_pipelines diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 38bb905034..a842053ddd 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -8,11 +8,29 @@ from ruamel import yaml from dvc.exceptions import DvcException, PathMissingError -from dvc.plot import Template, NoDataForTemplateError +from dvc.template import Template from dvc.repo import locked logger = logging.getLogger(__name__) +PAGE_HTML = """ + + dvc plot + + + + + + {divs} + +""" + +DIV_HTML = """
+""" + class NoMetricInHistoryError(DvcException): def __init__(self, path, revisions): @@ -173,8 +191,20 @@ def _evaluate_templatepath(repo, template=None): @locked +def fill_template(repo, datafile, template_path, revisions): + default_plot = template_path == repo.plot_templates.default_template + + template_datafiles = _parse_template(template_path, datafile) + + data = { + datafile: _load_from_revisions(repo, datafile, revisions, default_plot) + for datafile in template_datafiles + } + return Template.fill(template_path, data, datafile) + + def plot( - repo, datafile=None, template=None, revisions=None, file=None, embed=False + repo, datafile=None, template=None, revisions=None, fname=None, embed=False ): if revisions is None: revisions = [WORKSPACE_REVISION_NAME] @@ -184,28 +214,38 @@ def plot( template_path = _evaluate_templatepath(repo, template) - default_plot = template_path == repo.plot_templates.default_template + plot_content = fill_template(repo, datafile, template_path, revisions) - template_datafiles = _parse_template(template_path, datafile) - data = { - datafile: _load_from_revisions(repo, datafile, revisions, default_plot) - for datafile in template_datafiles - } + if embed: + div = DIV_HTML.format(id="plot", vega_json=plot_content) + plot_content = PAGE_HTML.format(divs=div) - if not file: - if datafile: - file = datafile - else: - file = first(template_datafiles) - if not file: - raise NoDataForTemplateError(template_path) + if not fname: + fname = _infer_result_file(datafile, template_path, embed) + + with open(fname, "w") as fobj: + fobj.write(plot_content) + return fname - file = "".join(file.split(".")[:-1] or file) + ".html" - result_path = Template.fill( - template_path, data, file, datafile, embed=embed - ) - return result_path +def _infer_result_file(datafile, template_path, embed): + if datafile: + tmp = datafile + else: + tmp = "plot" + + if not embed: + extension = os.path.splitext(template_path)[1] + else: + extension = ".html" + + result_file = os.path.splitext(tmp)[0] + extension + + if result_file == datafile or result_file == template_path: + raise DvcException( + "Could not infer plot name, please provide it " "with -f option." + ) + return result_file def _parse_template(template_path, datafile): diff --git a/dvc/plot.py b/dvc/template.py similarity index 78% rename from dvc/plot.py rename to dvc/template.py index 2b17853147..d5a35ad46a 100644 --- a/dvc/plot.py +++ b/dvc/template.py @@ -1,9 +1,7 @@ import json import logging import os -import random import re -import string from funcy import cached_property @@ -26,41 +24,6 @@ def __init__(self, template_path): ) -PAGE_HTML = """ - - dvc plot - - - - - - {divs} - -""" - -DIV_HTML = """
-""" - - -def _save_plot_html(divs, path): - page = PAGE_HTML.format(divs="\n".join(divs)) - with open(path, "w") as fobj: - fobj.write(page) - - -def _prepare_div(vega_dict): - div_id = "".join(random.sample(string.ascii_lowercase, 8)) - return DIV_HTML.format( - id=str(div_id), - vega_json=json.dumps( - vega_dict, indent=4, separators=(",", ": "), sort_keys=True - ), - ) - - class Template: INDENT = 4 SEPARATORS = (",", ": ") @@ -128,9 +91,7 @@ def get_datafile(placeholder_string): ) @staticmethod - def fill( - template_path, data, result_path, priority_datafile=None, embed=True - ): + def fill(template_path, data, priority_datafile=None): with open(template_path, "r") as fobj: result_content = fobj.read() @@ -142,6 +103,9 @@ def fill( else: key = file + if key not in data: + raise NoDataForTemplateError(template_path) + result_content = result_content.replace( placeholder, json.dumps( @@ -152,20 +116,7 @@ def fill( ), ) - if embed: - result_content = Template._embed(result_content) - - with open(result_path, "w") as fobj: - fobj.write(result_content) - - return result_path - - @staticmethod - def _embed(vega_json_string): - # TODO what about id? - # TODO 2 what about supporting multiple plots, eg as json list? - div = DIV_HTML.format(id="dvc_plot", vega_json=vega_json_string) - return PAGE_HTML.format(divs=div) + return result_content class DefaultLinearTemplate(Template): diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 7537a747bb..e8b965d882 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -9,7 +9,7 @@ from funcy import first from dvc.compat import fspath -from dvc.plot import ( +from dvc.template import ( DefaultLinearTemplate, TemplateNotFound, NoDataForTemplateError, @@ -184,7 +184,7 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): "metric.json", template="template.json", revisions=["HEAD", "v2", "v1"], - file="result.html", + fname="result.html", embed=True, ) @@ -216,10 +216,10 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): caplog.clear() with caplog.at_level(logging.WARNING, "dvc"): result = dvc.plot("metric.json", revisions=["v1", "v2"], embed=True) - assert ( - first(caplog.messages) - == "File 'metric.json' was not found at: 'v1'. It will not be plotted." - ) + assert ( + "File 'metric.json' was not found at: 'v1'. " + "It will not be plotted." in caplog.text + ) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -243,8 +243,8 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): ): dvc.plot("metric.json", revisions=["v1"], embed=True) - # do not warn if none found - assert len(caplog.messages) == 0 + # do not warn if none found + assert len(caplog.messages) == 0 assert ( str(error.value) @@ -353,7 +353,9 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", "template.json", embed=False) + result = dvc.plot( + "metric.json", "template.json", fname="result.json", embed=False + ) result_content = json.loads((tmp_dir / result).read_text()) vega_data = [ From 7e00009224a86c18f160d70d9ab371100bcddca8 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 17 Apr 2020 16:21:47 +0200 Subject: [PATCH 062/102] some parsing improvements --- dvc/repo/plot.py | 70 ++++++++++++++++++--------------- tests/unit/command/test_plot.py | 4 +- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index a842053ddd..37ebb99571 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,4 +1,5 @@ import csv +import io import json import logging import os @@ -117,43 +118,40 @@ def _parse_csv(fobj, default_plot, delimiter=","): return data -def _load_from(fobj, datafile, default_plot=False): - filename = datafile.lower() - if filename.endswith(".json"): - data = _parse_json(fobj, default_plot) - elif filename.endswith(".csv"): - data = _parse_csv(fobj, default_plot) - elif filename.endswith(".tsv"): - data = _parse_csv(fobj, default_plot, "\t") - elif filename.endswith(".yaml"): - data = _parse_yaml(fobj, default_plot) - else: - raise PlotMetricTypeError(datafile) - - return data +def parse(datafile, content, default_plot=False): + _, extension = os.path.splitext(datafile.lower()) + if extension == ".json": + return _parse_json(io.StringIO(content), default_plot) + elif extension == ".csv": + return _parse_csv(io.StringIO(content), default_plot) + elif extension == ".tsv": + return _parse_csv(io.StringIO(content), default_plot, "\t") + elif extension == ".yaml": + return _parse_yaml(io.StringIO(content), default_plot) + raise PlotMetricTypeError(datafile) -def _load_from_revision(repo, datafile, revision, default_plot=False): - try: - if revision is WORKSPACE_REVISION_NAME: +def _load_from_revision(repo, datafile, revision): + if revision is WORKSPACE_REVISION_NAME: - def open_datafile(): - return repo.tree.open(datafile, "r") + def open_datafile(): + return repo.tree.open(datafile, "r") - else: + else: - def open_datafile(): - from dvc import api + def open_datafile(): + from dvc import api - return api.open(datafile, repo.root_dir, revision) + return api.open(datafile, repo.root_dir, revision) + try: with open_datafile() as fobj: - data = _load_from(fobj, datafile, default_plot) - for d in data: - d["rev"] = revision + datafile_content = fobj.read() + except (FileNotFoundError, PathMissingError): raise NoMetricOnRevisionError(datafile, revision) - return data + + return datafile_content def _load_from_revisions(repo, datafile, revisions, default_plot=False): @@ -162,13 +160,21 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): for rev in revisions: try: - data.extend( - _load_from_revision( - repo, datafile, rev, default_plot=default_plot - ) - ) + content = _load_from_revision(repo, datafile, rev) + + tmp = parse(datafile, content, default_plot) + for data_point in tmp: + data_point["rev"] = rev + + data.extend(tmp) + except NoMetricOnRevisionError as e: exceptions.append(e) + except PlotMetricTypeError: + raise + except Exception: + logger.error("Failed to parse '{}' at '{}.'".format(datafile, rev)) + raise if not data and exceptions: raise NoMetricInHistoryError(datafile, revisions) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index c8678190ad..d073ac5f8a 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -34,7 +34,7 @@ def test_metrics_diff(mocker): datafile="datafile", template="template", revisions=["HEAD", "tag1", "tag2"], - file="result.extension", + fname="result.extension", embed=False, ) @@ -64,7 +64,7 @@ def test_metrics_show(mocker): m.assert_called_once_with( datafile="datafile", template="template", - file="result.extension", + fname="result.extension", revisions=None, embed=False, ) From ad9c8d8041fc539fd01a4f83f2811a359ce6ed74 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 17 Apr 2020 17:56:43 +0200 Subject: [PATCH 063/102] add columns functionality --- dvc/command/plot.py | 23 ++++++-- dvc/repo/plot.py | 93 ++++++++++++++++++++++----------- tests/func/test_plot.py | 74 ++++++++++++++++++++------ tests/unit/command/test_plot.py | 10 +++- 4 files changed, 149 insertions(+), 51 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 9fc1d8570b..ed872b117f 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -15,13 +15,18 @@ def _revisions(self): raise NotImplementedError def run(self): + + columns = None + if self.args.columns: + columns = set(self.args.columns.split(",")) try: result = self.repo.plot( datafile=self.args.datafile, template=self.args.template, revisions=self._revisions(), fname=self.args.file, - embed=not self.args.no_embed, + columns=columns, + embed=not self.args.show_json, ) except DvcException: logger.exception("") @@ -91,11 +96,17 @@ def add_parser(subparsers, parent_parser): "-f", "--file", help="Name of the generated file." ) plot_show_parser.add_argument( - "--no-embed", + "--show-json", action="store_true", default=False, help="Do not wrap vega plot json with HTML.", ) + plot_show_parser.add_argument( + "-c", + "--columns", + default=None, + help="Choose which columns to put into plot.", + ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -134,9 +145,15 @@ def add_parser(subparsers, parent_parser): ) plot_diff_parser.add_argument( - "--no-embed", + "--show-json", action="store_true", default=False, help="Do not wrap vega plot json with HTML.", ) + plot_diff_parser.add_argument( + "-c", + "--columns", + default=None, + help="Choose which columns to put into plot.", + ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 37ebb99571..8d54ba63d0 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -5,7 +5,7 @@ import os from collections import OrderedDict -from funcy import first +from funcy import first, last from ruamel import yaml from dvc.exceptions import DvcException, PathMissingError @@ -97,37 +97,35 @@ def _parse_json(fobj, default_plot): return _parse(data, default_plot) -def _parse_csv(fobj, default_plot, delimiter=","): - if default_plot: - data = [] - for index, row in enumerate(csv.reader(fobj, delimiter=delimiter)): - assert len(row) >= 1 - if index == 0 and len(row) > 1: - # skip header - continue - data.append({"y": row[-1], "x": index}) +def _parse_csv(file_content, delimiter=","): + first_row = first(csv.reader(io.StringIO(file_content))) + + if len(first_row) == 1: + reader = csv.DictReader( + io.StringIO(file_content), + delimiter=delimiter, + fieldnames=["value"], + ) else: - data = [ - row - for row in ( - csv.DictReader( - fobj, skipinitialspace=True, delimiter=delimiter - ) - ) - ] - return data + reader = csv.DictReader( + io.StringIO(file_content), + skipinitialspace=True, + delimiter=delimiter, + ) + + return [row for row in reader], reader.fieldnames def parse(datafile, content, default_plot=False): _, extension = os.path.splitext(datafile.lower()) if extension == ".json": - return _parse_json(io.StringIO(content), default_plot) + return _parse_json(io.StringIO(content), default_plot), None elif extension == ".csv": - return _parse_csv(io.StringIO(content), default_plot) + return _parse_csv(content) elif extension == ".tsv": - return _parse_csv(io.StringIO(content), default_plot, "\t") + return _parse_csv(content, "\t") elif extension == ".yaml": - return _parse_yaml(io.StringIO(content), default_plot) + return _parse_yaml(io.StringIO(content), default_plot), None raise PlotMetricTypeError(datafile) @@ -154,7 +152,9 @@ def open_datafile(): return datafile_content -def _load_from_revisions(repo, datafile, revisions, default_plot=False): +def _load_from_revisions( + repo, datafile, revisions, default_plot=False, columns=None +): data = [] exceptions = [] @@ -162,11 +162,24 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): try: content = _load_from_revision(repo, datafile, rev) - tmp = parse(datafile, content, default_plot) - for data_point in tmp: + tmp_data, fieldnames = parse(datafile, content) + _filter_columns(tmp_data, columns) + + if default_plot: + new_tmp = [] + if fieldnames: + y = last(fieldnames) + else: + y = last(list(first(tmp_data).keys())) + + for index, data_point in enumerate(tmp_data): + new_tmp.append({"x": index, "y": data_point[y]}) + tmp_data = new_tmp + + for data_point in tmp_data: data_point["rev"] = rev - data.extend(tmp) + data.extend(tmp_data) except NoMetricOnRevisionError as e: exceptions.append(e) @@ -187,6 +200,14 @@ def _load_from_revisions(repo, datafile, revisions, default_plot=False): return data +def _filter_columns(data_points, columns): + if columns: + for data_point in data_points: + to_del = set(data_point.keys()) - columns + for key in to_del: + del data_point[key] + + def _evaluate_templatepath(repo, template=None): if not template: return repo.plot_templates.default_template @@ -197,20 +218,28 @@ def _evaluate_templatepath(repo, template=None): @locked -def fill_template(repo, datafile, template_path, revisions): +def fill_template(repo, datafile, template_path, revisions, columns=None): default_plot = template_path == repo.plot_templates.default_template template_datafiles = _parse_template(template_path, datafile) data = { - datafile: _load_from_revisions(repo, datafile, revisions, default_plot) + datafile: _load_from_revisions( + repo, datafile, revisions, default_plot, columns=columns + ) for datafile in template_datafiles } return Template.fill(template_path, data, datafile) def plot( - repo, datafile=None, template=None, revisions=None, fname=None, embed=False + repo, + datafile=None, + template=None, + revisions=None, + fname=None, + columns=None, + embed=False, ): if revisions is None: revisions = [WORKSPACE_REVISION_NAME] @@ -220,7 +249,9 @@ def plot( template_path = _evaluate_templatepath(repo, template) - plot_content = fill_template(repo, datafile, template_path, revisions) + plot_content = fill_template( + repo, datafile, template_path, revisions, columns + ) if embed: div = DIV_HTML.format(id="plot", vega_json=plot_content) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index e8b965d882..962beb9bc5 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -86,9 +86,8 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - # header was skipped so index starts at 1 - {"y": "2", "x": 1, "rev": "workspace"}, - {"y": "3", "x": 2, "rev": "workspace"}, + {"y": "2", "x": 0, "rev": "workspace"}, + {"y": "3", "x": 1, "rev": "workspace"}, ], sort_keys=True, ) @@ -252,17 +251,22 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): ) -def test_custom_template(tmp_dir, scm, dvc): +@pytest.fixture() +def custom_template(tmp_dir, dvc): + custom_template = tmp_dir / "custom_template.json" shutil.copy( fspath(tmp_dir / ".dvc" / "plot" / "default.json"), - fspath(tmp_dir / "newtemplate.json"), + fspath(custom_template), ) + return custom_template + +def test_custom_template(tmp_dir, scm, dvc, custom_template): metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", "newtemplate.json", embed=True) + result = dvc.plot("metric.json", fspath(custom_template), embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -281,22 +285,20 @@ def _replace(path, src, dst): path.write_text(path.read_text().replace(src, dst)) -def test_custom_template_with_specified_data(tmp_dir, scm, dvc): - shutil.copy( - fspath(tmp_dir / ".dvc" / "plot" / "default.json"), - fspath(tmp_dir / "newtemplate.json"), - ) +def test_custom_template_with_specified_data( + tmp_dir, scm, dvc, custom_template +): _replace( - tmp_dir / "newtemplate.json", - "DVC_METRIC_DATA", - "DVC_METRIC_DATA,metric.json", + custom_template, "DVC_METRIC_DATA", "DVC_METRIC_DATA,metric.json", ) metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot(datafile=None, template="newtemplate.json", embed=True) + result = dvc.plot( + datafile=None, template=fspath(custom_template), embed=True + ) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( @@ -385,3 +387,45 @@ def test_plot_wrong_metric_type(tmp_dir, scm, dvc): tmp_dir.scm_gen("metric.txt", "content", commit="initial") with pytest.raises(PlotMetricTypeError): dvc.plot(datafile="metric.txt") + + +def test_plot_choose_columns(tmp_dir, scm, dvc, custom_template): + metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "init", "v1") + + result = dvc.plot( + "metric.json", fspath(custom_template), columns={"b", "c"}, embed=True + ) + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"b": 2, "c": 3, "rev": "workspace"}, + {"b": 3, "c": 4, "rev": "workspace"}, + ], + sort_keys=True, + ) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) + + +def test_plot_default_choose_column(tmp_dir, scm, dvc): + metric = [{"a": 1, "b": 2, "c": 3}, {"a": 2, "b": 3, "c": 4}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "init", "v1") + + result = dvc.plot("metric.json", columns={"c"}, embed=True) + + page_content = BeautifulSoup((tmp_dir / result).read_text()) + vega_data = json.dumps( + [ + {"x": 0, "y": 3, "rev": "workspace"}, + {"x": 1, "y": 4, "rev": "workspace"}, + ], + sort_keys=True, + ) + assert _remove_whitespace(vega_data) in _remove_whitespace( + first(page_content.body.script.contents) + ) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index d073ac5f8a..484993f422 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -15,7 +15,9 @@ def test_metrics_diff(mocker): "template", "-d", "datafile", - "--no-embed", + "--columns", + "column1,column2", + "--show-json", "HEAD", "tag1", "tag2", @@ -35,6 +37,7 @@ def test_metrics_diff(mocker): template="template", revisions=["HEAD", "tag1", "tag2"], fname="result.extension", + columns={"column1", "column2"}, embed=False, ) @@ -48,7 +51,9 @@ def test_metrics_show(mocker): "result.extension", "-t", "template", - "--no-embed", + "--columns", + "column1,column2", + "--show-json", "datafile", ] ) @@ -66,6 +71,7 @@ def test_metrics_show(mocker): template="template", fname="result.extension", revisions=None, + columns={"column1", "column2"}, embed=False, ) From 0c453aadeadf4a6d31960b282a5c63f0fc436cf1 Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 20 Apr 2020 15:10:08 +0200 Subject: [PATCH 064/102] extract default data transformation to separate method --- dvc/repo/plot.py | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 8d54ba63d0..f3305b75c3 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -4,6 +4,7 @@ import logging import os from collections import OrderedDict +from copy import copy from funcy import first, last from ruamel import yaml @@ -152,6 +153,18 @@ def open_datafile(): return datafile_content +def _transform_to_default_data(data_points, fieldnames=None): + new_data = [] + if fieldnames: + y = last(fieldnames) + else: + y = last(list(first(data_points).keys())) + + for index, data_point in enumerate(data_points): + new_data.append({"x": index, "y": data_point[y]}) + return new_data + + def _load_from_revisions( repo, datafile, revisions, default_plot=False, columns=None ): @@ -163,18 +176,10 @@ def _load_from_revisions( content = _load_from_revision(repo, datafile, rev) tmp_data, fieldnames = parse(datafile, content) - _filter_columns(tmp_data, columns) + tmp_data = _filter_columns(tmp_data, columns) if default_plot: - new_tmp = [] - if fieldnames: - y = last(fieldnames) - else: - y = last(list(first(tmp_data).keys())) - - for index, data_point in enumerate(tmp_data): - new_tmp.append({"x": index, "y": data_point[y]}) - tmp_data = new_tmp + tmp_data = _transform_to_default_data(tmp_data, fieldnames) for data_point in tmp_data: data_point["rev"] = rev @@ -201,11 +206,17 @@ def _load_from_revisions( def _filter_columns(data_points, columns): - if columns: - for data_point in data_points: - to_del = set(data_point.keys()) - columns - for key in to_del: - del data_point[key] + if not columns: + return data_points + + result = [] + for data_point in data_points: + new_dp = copy(data_point) + to_del = set(data_point.keys()) - columns + for key in to_del: + del new_dp[key] + result.append(new_dp) + return result def _evaluate_templatepath(repo, template=None): From 893939c3858e3827bda2fca8bde7fc681366e5cf Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 20 Apr 2020 18:12:47 +0200 Subject: [PATCH 065/102] plot: initial support for jsonpath --- dvc/command/plot.py | 7 +++- dvc/repo/plot.py | 77 +++++++++++++++++++++++++++++------------ tests/func/test_plot.py | 6 ++-- tests/unit/test_plot.py | 22 ++++++++++++ 4 files changed, 85 insertions(+), 27 deletions(-) create mode 100644 tests/unit/test_plot.py diff --git a/dvc/command/plot.py b/dvc/command/plot.py index ed872b117f..2e8bbbc9c1 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -17,8 +17,12 @@ def _revisions(self): def run(self): columns = None + path = None if self.args.columns: - columns = set(self.args.columns.split(",")) + if self.args.columns.startswith("$"): + path = self.args.columns + else: + columns = set(self.args.columns.split(",")) try: result = self.repo.plot( datafile=self.args.datafile, @@ -26,6 +30,7 @@ def run(self): revisions=self._revisions(), fname=self.args.file, columns=columns, + path=path, embed=not self.args.show_json, ) except DvcException: diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index f3305b75c3..3200eb2088 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -74,28 +74,53 @@ def __init__(self, file): ) -WORKSPACE_REVISION_NAME = "workspace" +class UnexpectedJsonStructureError(DvcException): + pass -def _parse(data, default_plot): - assert isinstance(data, list) - if default_plot: - assert all(len(e) >= 1 for e in data) - last_key = list(first(data).keys())[-1] - data = [{"y": d[last_key], "x": i} for i, d in enumerate(data)] - return data +class JsonParsingError(DvcException): + def __init__(self, file): + super().__init__( + "Failed to infer data structure from '{}'. Did you forget " + "to specify jsonpath?".format(file) + ) -def _parse_yaml(fobj, default_plot): - data = yaml.load(fobj) +WORKSPACE_REVISION_NAME = "workspace" - return _parse(data, default_plot) +def _parse_yaml(content): + return yaml.load(content) + + +def _parse_json(content, path=None): + import jsonpath_ng + + result = json.loads(content, object_pairs_hook=OrderedDict) + + if path: + found = jsonpath_ng.parse(path).find(result) + first_datum = first(found) + if ( + len(found) == 1 + and isinstance(first_datum.value, list) + and isinstance(first(first_datum.value), dict) + ): + # list of dicts + result = first_datum.value + elif len(first_datum.path.fields) == 1: + # list of values + field_name = first(first_datum.path.fields) + result = [{field_name: datum.value} for datum in found] + else: + raise DvcException( + "Could not parse data for path '{}'".format(path) + ) -def _parse_json(fobj, default_plot): - data = json.load(fobj, object_pairs_hook=OrderedDict) + if not isinstance(result, list) or not (isinstance(first(result), dict)): + raise UnexpectedJsonStructureError("Unable to parse") - return _parse(data, default_plot) + return result def _parse_csv(file_content, delimiter=","): @@ -117,16 +142,16 @@ def _parse_csv(file_content, delimiter=","): return [row for row in reader], reader.fieldnames -def parse(datafile, content, default_plot=False): +def parse(datafile, content, path): _, extension = os.path.splitext(datafile.lower()) if extension == ".json": - return _parse_json(io.StringIO(content), default_plot), None + return _parse_json(content, path), None elif extension == ".csv": return _parse_csv(content) elif extension == ".tsv": return _parse_csv(content, "\t") elif extension == ".yaml": - return _parse_yaml(io.StringIO(content), default_plot), None + return _parse_yaml(io.StringIO(content)), None raise PlotMetricTypeError(datafile) @@ -166,7 +191,7 @@ def _transform_to_default_data(data_points, fieldnames=None): def _load_from_revisions( - repo, datafile, revisions, default_plot=False, columns=None + repo, datafile, revisions, default_plot=False, columns=None, path=None ): data = [] exceptions = [] @@ -175,7 +200,8 @@ def _load_from_revisions( try: content = _load_from_revision(repo, datafile, rev) - tmp_data, fieldnames = parse(datafile, content) + tmp_data, fieldnames = parse(datafile, content, path) + tmp_data = _filter_columns(tmp_data, columns) if default_plot: @@ -190,8 +216,10 @@ def _load_from_revisions( exceptions.append(e) except PlotMetricTypeError: raise + except UnexpectedJsonStructureError: + raise JsonParsingError(datafile) except Exception: - logger.error("Failed to parse '{}' at '{}.'".format(datafile, rev)) + logger.error("Failed to parse '{}' at '{}'.".format(datafile, rev)) raise if not data and exceptions: @@ -229,14 +257,16 @@ def _evaluate_templatepath(repo, template=None): @locked -def fill_template(repo, datafile, template_path, revisions, columns=None): +def fill_template( + repo, datafile, template_path, revisions, columns=None, path=None +): default_plot = template_path == repo.plot_templates.default_template template_datafiles = _parse_template(template_path, datafile) data = { datafile: _load_from_revisions( - repo, datafile, revisions, default_plot, columns=columns + repo, datafile, revisions, default_plot, columns=columns, path=path ) for datafile in template_datafiles } @@ -250,6 +280,7 @@ def plot( revisions=None, fname=None, columns=None, + path=None, embed=False, ): if revisions is None: @@ -261,7 +292,7 @@ def plot( template_path = _evaluate_templatepath(repo, template) plot_content = fill_template( - repo, datafile, template_path, revisions, columns + repo, datafile, template_path, revisions, columns, path ) if embed: diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 962beb9bc5..1aa23d6c47 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -416,13 +416,13 @@ def test_plot_default_choose_column(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", columns={"c"}, embed=True) + result = dvc.plot("metric.json", columns={"b"}, embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) vega_data = json.dumps( [ - {"x": 0, "y": 3, "rev": "workspace"}, - {"x": 1, "y": 4, "rev": "workspace"}, + {"x": 0, "y": 2, "rev": "workspace"}, + {"x": 1, "y": 3, "rev": "workspace"}, ], sort_keys=True, ) diff --git a/tests/unit/test_plot.py b/tests/unit/test_plot.py new file mode 100644 index 0000000000..c34d4e48fb --- /dev/null +++ b/tests/unit/test_plot.py @@ -0,0 +1,22 @@ +import json + +import pytest + +from dvc.repo.plot import _parse_json + + +@pytest.mark.parametrize( + "path,expected_result", + [ + ("$.some.path[*].a", [{"a": 1}, {"a": 4}]), + ("$.some.path", [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]), + ], +) +def test_parse_json(path, expected_result): + value = { + "some": {"path": [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]} + } + + result = _parse_json(json.dumps(value), path) + + assert result == expected_result From 0a5b945aa86a94f380723abf02cca071beb199ec Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 21 Apr 2020 15:35:23 +0200 Subject: [PATCH 066/102] plot: rename columns to filters, tests are dict based --- dvc/command/plot.py | 22 ++- dvc/repo/plot.py | 18 +-- tests/func/test_plot.py | 250 ++++++++++++++------------------ tests/unit/command/test_plot.py | 12 +- 4 files changed, 131 insertions(+), 171 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 2e8bbbc9c1..d07a03f00f 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -16,20 +16,20 @@ def _revisions(self): def run(self): - columns = None + fields = None path = None - if self.args.columns: - if self.args.columns.startswith("$"): - path = self.args.columns + if self.args.filter: + if self.args.filter.startswith("$"): + path = self.args.filter else: - columns = set(self.args.columns.split(",")) + fields = set(self.args.filter.split(",")) try: result = self.repo.plot( datafile=self.args.datafile, template=self.args.template, revisions=self._revisions(), fname=self.args.file, - columns=columns, + fields=fields, path=path, embed=not self.args.show_json, ) @@ -107,10 +107,9 @@ def add_parser(subparsers, parent_parser): help="Do not wrap vega plot json with HTML.", ) plot_show_parser.add_argument( - "-c", - "--columns", + "--filter", default=None, - help="Choose which columns to put into plot.", + help="Choose which fileds or path to put into plot.", ) plot_show_parser.set_defaults(func=CmdPlotShow) @@ -156,9 +155,8 @@ def add_parser(subparsers, parent_parser): help="Do not wrap vega plot json with HTML.", ) plot_diff_parser.add_argument( - "-c", - "--columns", + "--filter", default=None, - help="Choose which columns to put into plot.", + help="Choose which fileds or path to put into plot.", ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 3200eb2088..e9db089b93 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -191,7 +191,7 @@ def _transform_to_default_data(data_points, fieldnames=None): def _load_from_revisions( - repo, datafile, revisions, default_plot=False, columns=None, path=None + repo, datafile, revisions, default_plot=False, fields=None, path=None ): data = [] exceptions = [] @@ -202,7 +202,7 @@ def _load_from_revisions( tmp_data, fieldnames = parse(datafile, content, path) - tmp_data = _filter_columns(tmp_data, columns) + tmp_data = _filter_fields(tmp_data, fields) if default_plot: tmp_data = _transform_to_default_data(tmp_data, fieldnames) @@ -233,14 +233,14 @@ def _load_from_revisions( return data -def _filter_columns(data_points, columns): - if not columns: +def _filter_fields(data_points, fields): + if not fields: return data_points result = [] for data_point in data_points: new_dp = copy(data_point) - to_del = set(data_point.keys()) - columns + to_del = set(data_point.keys()) - fields for key in to_del: del new_dp[key] result.append(new_dp) @@ -258,7 +258,7 @@ def _evaluate_templatepath(repo, template=None): @locked def fill_template( - repo, datafile, template_path, revisions, columns=None, path=None + repo, datafile, template_path, revisions, fields=None, path=None ): default_plot = template_path == repo.plot_templates.default_template @@ -266,7 +266,7 @@ def fill_template( data = { datafile: _load_from_revisions( - repo, datafile, revisions, default_plot, columns=columns, path=path + repo, datafile, revisions, default_plot, fields=fields, path=path ) for datafile in template_datafiles } @@ -279,7 +279,7 @@ def plot( template=None, revisions=None, fname=None, - columns=None, + fields=None, path=None, embed=False, ): @@ -292,7 +292,7 @@ def plot( template_path = _evaluate_templatepath(repo, template) plot_content = fill_template( - repo, datafile, template_path, revisions, columns, path + repo, datafile, template_path, revisions, fields, path ) if embed: diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 1aa23d6c47..6144498240 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -59,19 +59,13 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") - result = dvc.plot("metric.csv", embed=True) + result = dvc.plot("metric.csv") - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"y": "2", "x": 0, "rev": "workspace"}, - {"y": "3", "x": 1, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"y": "2", "x": 0, "rev": "workspace"}, + {"y": "3", "x": 1, "rev": "workspace"}, + ] def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): @@ -82,18 +76,12 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") - result = dvc.plot("metric.csv", embed=True) - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"y": "2", "x": 0, "rev": "workspace"}, - {"y": "3", "x": 1, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + result = dvc.plot("metric.csv") + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"y": "2", "x": 0, "rev": "workspace"}, + {"y": "3", "x": 1, "rev": "workspace"}, + ] def test_plot_json_single_val(tmp_dir, scm, dvc): @@ -101,19 +89,13 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot("metric.json", embed=True) + result = dvc.plot("metric.json", fname="result.json") - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"y": 2, "x": 0, "rev": "workspace"}, - {"y": 3, "x": 1, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"y": 2, "x": 0, "rev": "workspace"}, + {"y": 3, "x": 1, "rev": "workspace"}, + ] def test_plot_json_multiple_val(tmp_dir, scm, dvc): @@ -124,19 +106,13 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot("metric.json", embed=True) + result = dvc.plot("metric.json", fname="result.json") - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"y": 2, "x": 0, "rev": "workspace"}, - {"y": 3, "x": 1, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"y": 2, "x": 0, "rev": "workspace"}, + {"y": 3, "x": 1, "rev": "workspace"}, + ] def test_plot_confusion(tmp_dir, dvc): @@ -147,20 +123,16 @@ def test_plot_confusion(tmp_dir, dvc): _write_json(tmp_dir, confusion_matrix, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot(datafile="metric.json", template="confusion", embed=True) - - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"predicted": "B", "actual": "A", "rev": "workspace"}, - {"predicted": "A", "actual": "A", "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) + result = dvc.plot( + datafile="metric.json", template="confusion", fname="result.json" ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"predicted": "B", "actual": "A", "rev": "workspace"}, + {"predicted": "A", "actual": "A", "rev": "workspace"}, + ] + def test_plot_multiple_revs(tmp_dir, scm, dvc): shutil.copy( @@ -183,25 +155,18 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): "metric.json", template="template.json", revisions=["HEAD", "v2", "v1"], - fname="result.html", - embed=True, + fname="result.json", ) - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"y": 5, "x": 1, "rev": "HEAD"}, - {"y": 6, "x": 2, "rev": "HEAD"}, - {"y": 3, "x": 1, "rev": "v2"}, - {"y": 5, "x": 2, "rev": "v2"}, - {"y": 2, "x": 1, "rev": "v1"}, - {"y": 3, "x": 2, "rev": "v1"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"y": 5, "x": 1, "rev": "HEAD"}, + {"y": 6, "x": 2, "rev": "HEAD"}, + {"y": 3, "x": 1, "rev": "v2"}, + {"y": 5, "x": 2, "rev": "v2"}, + {"y": 2, "x": 1, "rev": "v1"}, + {"y": 3, "x": 2, "rev": "v1"}, + ] def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): @@ -214,20 +179,19 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): caplog.clear() with caplog.at_level(logging.WARNING, "dvc"): - result = dvc.plot("metric.json", revisions=["v1", "v2"], embed=True) + result = dvc.plot( + "metric.json", revisions=["v1", "v2"], fname="result.json" + ) assert ( "File 'metric.json' was not found at: 'v1'. " "It will not be plotted." in caplog.text ) - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [{"y": 2, "x": 0, "rev": "v2"}, {"y": 3, "x": 1, "rev": "v2"}], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"y": 2, "x": 0, "rev": "v2"}, + {"y": 3, "x": 1, "rev": "v2"}, + ] def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): @@ -240,7 +204,7 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): with pytest.raises(NoMetricInHistoryError) as error, caplog.at_level( logging.WARNING, "dvc" ): - dvc.plot("metric.json", revisions=["v1"], embed=True) + dvc.plot("metric.json", revisions=["v1"]) # do not warn if none found assert len(caplog.messages) == 0 @@ -266,20 +230,16 @@ def test_custom_template(tmp_dir, scm, dvc, custom_template): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", fspath(custom_template), embed=True) - - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"a": 1, "b": 2, "rev": "workspace"}, - {"a": 2, "b": 3, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) + result = dvc.plot( + "metric.json", fspath(custom_template), fname="result.json" ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"a": 1, "b": 2, "rev": "workspace"}, + {"a": 2, "b": 3, "rev": "workspace"}, + ] + def _replace(path, src, dst): path.write_text(path.read_text().replace(src, dst)) @@ -297,20 +257,14 @@ def test_custom_template_with_specified_data( _run_with_metric(tmp_dir, "metric.json", "init", "v1") result = dvc.plot( - datafile=None, template=fspath(custom_template), embed=True + datafile=None, template=fspath(custom_template), fname="result.json" ) - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"a": 1, "b": 2, "rev": "workspace"}, - {"a": 2, "b": 3, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"a": 1, "b": 2, "rev": "workspace"}, + {"a": 2, "b": 3, "rev": "workspace"}, + ] def test_plot_override_specified_data_source(tmp_dir, scm, dvc): @@ -329,20 +283,16 @@ def test_plot_override_specified_data_source(tmp_dir, scm, dvc): _run_with_metric(tmp_dir, "metric2.json", "init", "v1") result = dvc.plot( - datafile="metric2.json", template="newtemplate.json", embed=True + datafile="metric2.json", + template="newtemplate.json", + fname="result.json", ) - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"a": 1, "b": 2, "rev": "workspace"}, - {"a": 2, "b": 3, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"a": 1, "b": 2, "rev": "workspace"}, + {"a": 2, "b": 3, "rev": "workspace"}, + ] def test_should_embed_vega_json_template(tmp_dir, scm, dvc): @@ -359,13 +309,11 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): "metric.json", "template.json", fname="result.json", embed=False ) - result_content = json.loads((tmp_dir / result).read_text()) - vega_data = [ + plot_json = json.loads((tmp_dir / result).read_text()) + assert [ {"x": 1, "y": 2, "rev": "workspace"}, {"x": 2, "y": 3, "rev": "workspace"}, - ] - - assert vega_data == result_content["data"]["values"] + ] == plot_json["data"]["values"] def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): @@ -395,20 +343,17 @@ def test_plot_choose_columns(tmp_dir, scm, dvc, custom_template): _run_with_metric(tmp_dir, "metric.json", "init", "v1") result = dvc.plot( - "metric.json", fspath(custom_template), columns={"b", "c"}, embed=True + "metric.json", + fspath(custom_template), + fields={"b", "c"}, + fname="result.json", ) - page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( - [ - {"b": 2, "c": 3, "rev": "workspace"}, - {"b": 3, "c": 4, "rev": "workspace"}, - ], - sort_keys=True, - ) - assert _remove_whitespace(vega_data) in _remove_whitespace( - first(page_content.body.script.contents) - ) + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"b": 2, "c": 3, "rev": "workspace"}, + {"b": 3, "c": 4, "rev": "workspace"}, + ] def test_plot_default_choose_column(tmp_dir, scm, dvc): @@ -416,16 +361,31 @@ def test_plot_default_choose_column(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", columns={"b"}, embed=True) + result = dvc.plot("metric.json", fields={"b"}, fname="result.json") + + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"x": 0, "y": 2, "rev": "workspace"}, + {"x": 1, "y": 3, "rev": "workspace"}, + ] + + +def test_plot_embed(tmp_dir, scm, dvc): + metric = [{"val": 2}, {"val": 3}] + _write_json(tmp_dir, metric, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "first run") + + result = dvc.plot("metric.json", fname="result.json", embed=True) page_content = BeautifulSoup((tmp_dir / result).read_text()) - vega_data = json.dumps( + data_dump = json.dumps( [ - {"x": 0, "y": 2, "rev": "workspace"}, - {"x": 1, "y": 3, "rev": "workspace"}, + {"y": 2, "x": 0, "rev": "workspace"}, + {"y": 3, "x": 1, "rev": "workspace"}, ], sort_keys=True, ) - assert _remove_whitespace(vega_data) in _remove_whitespace( + + assert _remove_whitespace(data_dump) in _remove_whitespace( first(page_content.body.script.contents) ) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 484993f422..cf3411f205 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -15,7 +15,7 @@ def test_metrics_diff(mocker): "template", "-d", "datafile", - "--columns", + "--filter", "column1,column2", "--show-json", "HEAD", @@ -37,7 +37,8 @@ def test_metrics_diff(mocker): template="template", revisions=["HEAD", "tag1", "tag2"], fname="result.extension", - columns={"column1", "column2"}, + fields={"column1", "column2"}, + path=None, embed=False, ) @@ -51,8 +52,8 @@ def test_metrics_show(mocker): "result.extension", "-t", "template", - "--columns", - "column1,column2", + "--filter", + "$.data", "--show-json", "datafile", ] @@ -71,7 +72,8 @@ def test_metrics_show(mocker): template="template", fname="result.extension", revisions=None, - columns={"column1", "column2"}, + fields=None, + path="$.data", embed=False, ) From c235d005ef2250c1fa0e7a77e81f862dcee67d86 Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 21 Apr 2020 16:46:13 +0200 Subject: [PATCH 067/102] plot: fixups --- dvc/repo/plot.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index e9db089b93..bfeee514cd 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -201,8 +201,7 @@ def _load_from_revisions( content = _load_from_revision(repo, datafile, rev) tmp_data, fieldnames = parse(datafile, content, path) - - tmp_data = _filter_fields(tmp_data, fields) + tmp_data, fieldnames = _filter_fields(tmp_data, fieldnames, fields) if default_plot: tmp_data = _transform_to_default_data(tmp_data, fieldnames) @@ -233,18 +232,21 @@ def _load_from_revisions( return data -def _filter_fields(data_points, fields): +def _filter_fields(data_points, fieldnames=None, fields=None): if not fields: - return data_points + return data_points, fieldnames + new_fieldnames = copy(fieldnames) result = [] for data_point in data_points: new_dp = copy(data_point) to_del = set(data_point.keys()) - fields for key in to_del: del new_dp[key] + if fieldnames and key in new_fieldnames: + new_fieldnames.remove(key) result.append(new_dp) - return result + return result, new_fieldnames def _evaluate_templatepath(repo, template=None): From 7120764629c50edbb2e3e2c2254b51dfe42c67d4 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 22 Apr 2020 15:42:50 +0200 Subject: [PATCH 068/102] plot: refactoring --- dvc/repo/plot.py | 252 ++++++++++++++++++++++++++++------------------- 1 file changed, 148 insertions(+), 104 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index bfeee514cd..b92eab35a9 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,12 +1,13 @@ import csv import io +import itertools import json import logging import os from collections import OrderedDict from copy import copy -from funcy import first, last +from funcy import first, last, cached_property from ruamel import yaml from dvc.exceptions import DvcException, PathMissingError @@ -89,70 +90,148 @@ def __init__(self, file): WORKSPACE_REVISION_NAME = "workspace" -def _parse_yaml(content): - return yaml.load(content) +def plot_data(filename, revision, content): + _, extension = os.path.splitext(filename.lower()) + if extension == ".json": + return JSONPlotData(filename, revision, content) + elif extension == ".csv": + return CSVPlotData(filename, revision, content) + elif extension == ".tsv": + return CSVPlotData(filename, revision, content, delimiter="\t") + elif extension == ".yaml": + return YAMLPLotData(filename, revision, content) + raise PlotMetricTypeError(filename) -def _parse_json(content, path=None): +def _filter_fields(data_points, fieldnames=None, fields=None, **kwargs): + if not fields: + return data_points, fieldnames + assert isinstance(fields, set) + + new_data = [] + for data_point in data_points: + new_dp = copy(data_point) + to_del = set(data_point.keys()) - fields + for key in to_del: + del new_dp[key] + if fieldnames and key in fieldnames: + fieldnames.remove(key) + new_data.append(new_dp) + return new_data, fieldnames + + +def _transform_to_default_data( + data_points, fieldnames=None, default_plot=False, **kwargs +): + if not default_plot: + return data_points, fieldnames + + new_data = [] + if fieldnames: + y = last(fieldnames) + else: + y = last(list(first(data_points).keys())) + + for index, data_point in enumerate(data_points): + new_data.append({"x": index, "y": data_point[y]}) + return new_data, ["x", "y"] + + +def _apply_path(data, fieldnames=None, path=None, **kwargs): + if not path: + return data, fieldnames + import jsonpath_ng - result = json.loads(content, object_pairs_hook=OrderedDict) - - if path: - found = jsonpath_ng.parse(path).find(result) - first_datum = first(found) - if ( - len(found) == 1 - and isinstance(first_datum.value, list) - and isinstance(first(first_datum.value), dict) - ): - # list of dicts - result = first_datum.value - elif len(first_datum.path.fields) == 1: - # list of values - field_name = first(first_datum.path.fields) - result = [{field_name: datum.value} for datum in found] - else: - raise DvcException( - "Could not parse data for path '{}'".format(path) - ) + found = jsonpath_ng.parse(path).find(data) + first_datum = first(found) + if ( + len(found) == 1 + and isinstance(first_datum.value, list) + and isinstance(first(first_datum.value), dict) + ): + data_points = first_datum.value + fieldnames = list(first(data_points).keys()) + elif len(first_datum.path.fields) == 1: + field_name = first(first_datum.path.fields) + data_points = [{field_name: datum.value} for datum in found] + else: + raise DvcException("Could not parse data for path '{}'".format(path)) - if not isinstance(result, list) or not (isinstance(first(result), dict)): + if not isinstance(data_points, list) or not ( + isinstance(first(data_points), dict) + ): raise UnexpectedJsonStructureError("Unable to parse") - return result + return data_points, fieldnames -def _parse_csv(file_content, delimiter=","): - first_row = first(csv.reader(io.StringIO(file_content))) +class PlotData: + def __init__(self, filename, revision, content, **kwargs): + self.filename = filename + self.revision = revision + self.content = content + self.fieldnames = None - if len(first_row) == 1: - reader = csv.DictReader( - io.StringIO(file_content), - delimiter=delimiter, - fieldnames=["value"], - ) - else: - reader = csv.DictReader( - io.StringIO(file_content), - skipinitialspace=True, - delimiter=delimiter, - ) + @property + def raw(self): + raise NotImplementedError - return [row for row in reader], reader.fieldnames + def _processors(self): + return [_filter_fields, _transform_to_default_data] + def to_datapoints(self, **kwargs): + data = self.raw + fieldnames = self.fieldnames -def parse(datafile, content, path): - _, extension = os.path.splitext(datafile.lower()) - if extension == ".json": - return _parse_json(content, path), None - elif extension == ".csv": - return _parse_csv(content) - elif extension == ".tsv": - return _parse_csv(content, "\t") - elif extension == ".yaml": - return _parse_yaml(io.StringIO(content)), None - raise PlotMetricTypeError(datafile) + for data_proc in self._processors(): + data, fieldnames = data_proc(data, fieldnames, **kwargs) + + for data_point in data: + data_point["rev"] = self.revision + return data + + +class JSONPlotData(PlotData): + @cached_property + def raw(self): + return json.loads(self.content, object_pairs_hook=OrderedDict) + + def _processors(self): + parent_processors = super(JSONPlotData, self)._processors() + return [_apply_path] + parent_processors + + +class CSVPlotData(PlotData): + def __init__(self, filename, revision, content, delimiter=","): + super(CSVPlotData, self).__init__(filename, revision, content) + self.delimiter = delimiter + + @cached_property + def raw(self): + first_row = first(csv.reader(io.StringIO(self.content))) + + if len(first_row) == 1: + reader = csv.DictReader( + io.StringIO(self.content), + delimiter=self.delimiter, + fieldnames=["value"], + ) + else: + reader = csv.DictReader( + io.StringIO(self.content), + skipinitialspace=True, + delimiter=self.delimiter, + ) + + self.fieldnames = reader.fieldnames + return [row for row in reader] + + +class YAMLPLotData(PlotData): + @cached_property + def raw(self): + return yaml.parse(io.StringIO(self.content)) def _load_from_revision(repo, datafile, revision): @@ -175,42 +254,16 @@ def open_datafile(): except (FileNotFoundError, PathMissingError): raise NoMetricOnRevisionError(datafile, revision) - return datafile_content - + return plot_data(datafile, revision, datafile_content) -def _transform_to_default_data(data_points, fieldnames=None): - new_data = [] - if fieldnames: - y = last(fieldnames) - else: - y = last(list(first(data_points).keys())) - for index, data_point in enumerate(data_points): - new_data.append({"x": index, "y": data_point[y]}) - return new_data - - -def _load_from_revisions( - repo, datafile, revisions, default_plot=False, fields=None, path=None -): +def _load_from_revisions(repo, datafile, revisions): data = [] exceptions = [] for rev in revisions: try: - content = _load_from_revision(repo, datafile, rev) - - tmp_data, fieldnames = parse(datafile, content, path) - tmp_data, fieldnames = _filter_fields(tmp_data, fieldnames, fields) - - if default_plot: - tmp_data = _transform_to_default_data(tmp_data, fieldnames) - - for data_point in tmp_data: - data_point["rev"] = rev - - data.extend(tmp_data) - + data.append(_load_from_revision(repo, datafile, rev)) except NoMetricOnRevisionError as e: exceptions.append(e) except PlotMetricTypeError: @@ -232,23 +285,6 @@ def _load_from_revisions( return data -def _filter_fields(data_points, fieldnames=None, fields=None): - if not fields: - return data_points, fieldnames - - new_fieldnames = copy(fieldnames) - result = [] - for data_point in data_points: - new_dp = copy(data_point) - to_del = set(data_point.keys()) - fields - for key in to_del: - del new_dp[key] - if fieldnames and key in new_fieldnames: - new_fieldnames.remove(key) - result.append(new_dp) - return result, new_fieldnames - - def _evaluate_templatepath(repo, template=None): if not template: return repo.plot_templates.default_template @@ -266,13 +302,21 @@ def fill_template( template_datafiles = _parse_template(template_path, datafile) - data = { - datafile: _load_from_revisions( - repo, datafile, revisions, default_plot, fields=fields, path=path + template_data = {} + for datafile in template_datafiles: + plot_datas = _load_from_revisions(repo, datafile, revisions) + template_data[datafile] = list( + itertools.chain.from_iterable( + [ + pd.to_datapoints( + fields=fields, default_plot=default_plot, path=path + ) + for pd in plot_datas + ] + ) ) - for datafile in template_datafiles - } - return Template.fill(template_path, data, datafile) + + return Template.fill(template_path, template_data, datafile) def plot( From 6fcd98f6d2c0bd747000eac210790ab9b3fff453 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 22 Apr 2020 16:05:32 +0200 Subject: [PATCH 069/102] repo: plot: convert to package --- dvc/command/plot.py | 2 +- dvc/repo/plot/__init__.py | 144 +++++++++++++++++++++++++ dvc/repo/{plot.py => plot/data.py} | 167 +++-------------------------- tests/func/test_plot.py | 7 +- tests/unit/test_plot.py | 6 +- 5 files changed, 166 insertions(+), 160 deletions(-) create mode 100644 dvc/repo/plot/__init__.py rename dvc/repo/{plot.py => plot/data.py} (66%) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index d07a03f00f..3480cd24bb 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -4,7 +4,7 @@ from dvc.command.base import append_doc_link, CmdBase, fix_subparsers from dvc.exceptions import DvcException -from dvc.repo.plot import WORKSPACE_REVISION_NAME +from dvc.repo.plot.data import WORKSPACE_REVISION_NAME from dvc.utils import format_link logger = logging.getLogger(__name__) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py new file mode 100644 index 0000000000..8c06804a3e --- /dev/null +++ b/dvc/repo/plot/__init__.py @@ -0,0 +1,144 @@ +import itertools +import logging +import os + + +from dvc.exceptions import DvcException +from dvc.template import Template +from dvc.repo import locked + +logger = logging.getLogger(__name__) + +PAGE_HTML = """ + + dvc plot + + + + + + {divs} + +""" + +DIV_HTML = """
+""" + + +class TooManyDataSourcesError(DvcException): + def __init__(self, datafile, template_datafiles): + super().__init__( + "Unable to reason which of possible data sources: '{}' " + "should be replaced with '{}'".format( + ", ".join(template_datafiles), datafile + ) + ) + + +class NoDataOrTemplateProvided(DvcException): + def __init__(self): + super().__init__("Datafile or template is not specified.") + + +def _evaluate_templatepath(repo, template=None): + if not template: + return repo.plot_templates.default_template + + if os.path.exists(template): + return template + return repo.plot_templates.get_template(template) + + +@locked +def fill_template( + repo, datafile, template_path, revisions, fields=None, path=None +): + default_plot = template_path == repo.plot_templates.default_template + + template_datafiles = _parse_template(template_path, datafile) + + template_data = {} + for datafile in template_datafiles: + from dvc.repo.plot.data import _load_from_revisions + + plot_datas = _load_from_revisions(repo, datafile, revisions) + template_data[datafile] = list( + itertools.chain.from_iterable( + [ + pd.to_datapoints( + fields=fields, default_plot=default_plot, path=path + ) + for pd in plot_datas + ] + ) + ) + + return Template.fill(template_path, template_data, datafile) + + +def plot( + repo, + datafile=None, + template=None, + revisions=None, + fname=None, + fields=None, + path=None, + embed=False, +): + if revisions is None: + from dvc.repo.plot.data import WORKSPACE_REVISION_NAME + + revisions = [WORKSPACE_REVISION_NAME] + + if not datafile and not template: + raise NoDataOrTemplateProvided() + + template_path = _evaluate_templatepath(repo, template) + + plot_content = fill_template( + repo, datafile, template_path, revisions, fields, path + ) + + if embed: + div = DIV_HTML.format(id="plot", vega_json=plot_content) + plot_content = PAGE_HTML.format(divs=div) + + if not fname: + fname = _infer_result_file(datafile, template_path, embed) + + with open(fname, "w") as fobj: + fobj.write(plot_content) + return fname + + +def _infer_result_file(datafile, template_path, embed): + if datafile: + tmp = datafile + else: + tmp = "plot" + + if not embed: + extension = os.path.splitext(template_path)[1] + else: + extension = ".html" + + result_file = os.path.splitext(tmp)[0] + extension + + if result_file == datafile or result_file == template_path: + raise DvcException( + "Could not infer plot name, please provide it " "with -f option." + ) + return result_file + + +def _parse_template(template_path, datafile): + template_datafiles = Template.parse_data_placeholders(template_path) + if datafile: + if len(template_datafiles) > 1: + raise TooManyDataSourcesError(datafile, template_datafiles) + template_datafiles = {datafile} + return template_datafiles diff --git a/dvc/repo/plot.py b/dvc/repo/plot/data.py similarity index 66% rename from dvc/repo/plot.py rename to dvc/repo/plot/data.py index b92eab35a9..22829b2259 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot/data.py @@ -1,6 +1,5 @@ import csv import io -import itertools import json import logging import os @@ -11,60 +10,11 @@ from ruamel import yaml from dvc.exceptions import DvcException, PathMissingError -from dvc.template import Template -from dvc.repo import locked - -logger = logging.getLogger(__name__) - -PAGE_HTML = """ - - dvc plot - - - - - - {divs} - -""" - -DIV_HTML = """
-""" - - -class NoMetricInHistoryError(DvcException): - def __init__(self, path, revisions): - super().__init__( - "Could not find '{}' on any of the revisions " - "'{}'".format(path, ", ".join(revisions)) - ) -class NoMetricOnRevisionError(DvcException): - def __init__(self, path, revision): - self.path = path - self.revision = revision - super().__init__( - "Could not find '{}' on revision " "'{}'".format(path, revision) - ) - - -class TooManyDataSourcesError(DvcException): - def __init__(self, datafile, template_datafiles): - super().__init__( - "Unable to reason which of possible data sources: '{}' " - "should be replaced with '{}'".format( - ", ".join(template_datafiles), datafile - ) - ) - +logger = logging.getLogger(__name__) -class NoDataOrTemplateProvided(DvcException): - def __init__(self): - super().__init__("Datafile or template is not specified.") +WORKSPACE_REVISION_NAME = "workspace" class PlotMetricTypeError(DvcException): @@ -87,7 +37,21 @@ def __init__(self, file): ) -WORKSPACE_REVISION_NAME = "workspace" +class NoMetricOnRevisionError(DvcException): + def __init__(self, path, revision): + self.path = path + self.revision = revision + super().__init__( + "Could not find '{}' on revision " "'{}'".format(path, revision) + ) + + +class NoMetricInHistoryError(DvcException): + def __init__(self, path, revisions): + super().__init__( + "Could not find '{}' on any of the revisions " + "'{}'".format(path, ", ".join(revisions)) + ) def plot_data(filename, revision, content): @@ -283,100 +247,3 @@ def _load_from_revisions(repo, datafile, revisions): "plotted.".format(e.path, e.revision) ) return data - - -def _evaluate_templatepath(repo, template=None): - if not template: - return repo.plot_templates.default_template - - if os.path.exists(template): - return template - return repo.plot_templates.get_template(template) - - -@locked -def fill_template( - repo, datafile, template_path, revisions, fields=None, path=None -): - default_plot = template_path == repo.plot_templates.default_template - - template_datafiles = _parse_template(template_path, datafile) - - template_data = {} - for datafile in template_datafiles: - plot_datas = _load_from_revisions(repo, datafile, revisions) - template_data[datafile] = list( - itertools.chain.from_iterable( - [ - pd.to_datapoints( - fields=fields, default_plot=default_plot, path=path - ) - for pd in plot_datas - ] - ) - ) - - return Template.fill(template_path, template_data, datafile) - - -def plot( - repo, - datafile=None, - template=None, - revisions=None, - fname=None, - fields=None, - path=None, - embed=False, -): - if revisions is None: - revisions = [WORKSPACE_REVISION_NAME] - - if not datafile and not template: - raise NoDataOrTemplateProvided() - - template_path = _evaluate_templatepath(repo, template) - - plot_content = fill_template( - repo, datafile, template_path, revisions, fields, path - ) - - if embed: - div = DIV_HTML.format(id="plot", vega_json=plot_content) - plot_content = PAGE_HTML.format(divs=div) - - if not fname: - fname = _infer_result_file(datafile, template_path, embed) - - with open(fname, "w") as fobj: - fobj.write(plot_content) - return fname - - -def _infer_result_file(datafile, template_path, embed): - if datafile: - tmp = datafile - else: - tmp = "plot" - - if not embed: - extension = os.path.splitext(template_path)[1] - else: - extension = ".html" - - result_file = os.path.splitext(tmp)[0] + extension - - if result_file == datafile or result_file == template_path: - raise DvcException( - "Could not infer plot name, please provide it " "with -f option." - ) - return result_file - - -def _parse_template(template_path, datafile): - template_datafiles = Template.parse_data_placeholders(template_path) - if datafile: - if len(template_datafiles) > 1: - raise TooManyDataSourcesError(datafile, template_datafiles) - template_datafiles = {datafile} - return template_datafiles diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 6144498240..7cfe2171e0 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -9,16 +9,13 @@ from funcy import first from dvc.compat import fspath +from dvc.repo.plot.data import NoMetricInHistoryError, PlotMetricTypeError from dvc.template import ( DefaultLinearTemplate, TemplateNotFound, NoDataForTemplateError, ) -from dvc.repo.plot import ( - NoMetricInHistoryError, - NoDataOrTemplateProvided, - PlotMetricTypeError, -) +from dvc.repo.plot import NoDataOrTemplateProvided def _remove_whitespace(value): diff --git a/tests/unit/test_plot.py b/tests/unit/test_plot.py index c34d4e48fb..3c8c3ce062 100644 --- a/tests/unit/test_plot.py +++ b/tests/unit/test_plot.py @@ -1,8 +1,6 @@ -import json - import pytest -from dvc.repo.plot import _parse_json +from dvc.repo.plot.data import _apply_path @pytest.mark.parametrize( @@ -17,6 +15,6 @@ def test_parse_json(path, expected_result): "some": {"path": [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]} } - result = _parse_json(json.dumps(value), path) + result, _ = _apply_path(value, path=path) assert result == expected_result From dcac66acb279f8b3a932c2243842e9dd54051a76 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 22 Apr 2020 18:13:28 +0200 Subject: [PATCH 070/102] plot: data loading refactor, support searching for data --- dvc/command/plot.py | 2 +- dvc/repo/plot/data.py | 43 +++++++++++++++++++++++++++++++++-------- tests/unit/test_plot.py | 28 ++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 10 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 3480cd24bb..ca606fa120 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -157,6 +157,6 @@ def add_parser(subparsers, parent_parser): plot_diff_parser.add_argument( "--filter", default=None, - help="Choose which fileds or path to put into plot.", + help="Choose which filed(s) or jsonpath to put into plot.", ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 22829b2259..1d02a9e842 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -25,8 +25,9 @@ def __init__(self, file): ) -class UnexpectedJsonStructureError(DvcException): - pass +class PlotDataStructureError(DvcException): + def __init__(self): + super().__init__("Plot data extraction failed.") class JsonParsingError(DvcException): @@ -68,6 +69,10 @@ def plot_data(filename, revision, content): def _filter_fields(data_points, fieldnames=None, fields=None, **kwargs): + """ + Try to filter each data_point, preserving only keys specified in + fields. This method assumes that data_points is list of dicts. + """ if not fields: return data_points, fieldnames assert isinstance(fields, set) @@ -102,7 +107,7 @@ def _transform_to_default_data( def _apply_path(data, fieldnames=None, path=None, **kwargs): - if not path: + if not path or not isinstance(data, dict): return data, fieldnames import jsonpath_ng @@ -118,18 +123,42 @@ def _apply_path(data, fieldnames=None, path=None, **kwargs): fieldnames = list(first(data_points).keys()) elif len(first_datum.path.fields) == 1: field_name = first(first_datum.path.fields) + fieldnames = [field_name] data_points = [{field_name: datum.value} for datum in found] else: - raise DvcException("Could not parse data for path '{}'".format(path)) + raise PlotDataStructureError() if not isinstance(data_points, list) or not ( isinstance(first(data_points), dict) ): - raise UnexpectedJsonStructureError("Unable to parse") + raise PlotDataStructureError() return data_points, fieldnames +def _lists(dictionary): + for key, value in dictionary.items(): + if isinstance(value, dict): + yield from (_lists(value)) + elif isinstance(value, list): + yield value + + +def _find_data(data, fieldnames=None, fields=None, **kwargs): + if not fields or not isinstance(data, dict): + return data, fieldnames + + assert isinstance(fields, set) + + for l in _lists(data): + if all([isinstance(dp, dict) for dp in l]): + if set(first(l).keys()) & fields == fields: + if fieldnames: + return l, [f for f in fieldnames if f in fields] + return l, None + raise PlotDataStructureError() + + class PlotData: def __init__(self, filename, revision, content, **kwargs): self.filename = filename @@ -163,7 +192,7 @@ def raw(self): def _processors(self): parent_processors = super(JSONPlotData, self)._processors() - return [_apply_path] + parent_processors + return [_apply_path, _find_data] + parent_processors class CSVPlotData(PlotData): @@ -232,8 +261,6 @@ def _load_from_revisions(repo, datafile, revisions): exceptions.append(e) except PlotMetricTypeError: raise - except UnexpectedJsonStructureError: - raise JsonParsingError(datafile) except Exception: logger.error("Failed to parse '{}' at '{}'.".format(datafile, rev)) raise diff --git a/tests/unit/test_plot.py b/tests/unit/test_plot.py index 3c8c3ce062..6ebba5e469 100644 --- a/tests/unit/test_plot.py +++ b/tests/unit/test_plot.py @@ -1,6 +1,6 @@ import pytest -from dvc.repo.plot.data import _apply_path +from dvc.repo.plot.data import _apply_path, _lists, _find_data @pytest.mark.parametrize( @@ -18,3 +18,29 @@ def test_parse_json(path, expected_result): result, _ = _apply_path(value, path=path) assert result == expected_result + + +@pytest.mark.parametrize( + "dictionary, expected_result", + [ + ({}, []), + ({"x": ["a", "b", "c"]}, [["a", "b", "c"]]), + ( + {"x": {"y": ["a", "b"]}, "z": {"w": ["c", "d"]}}, + [["a", "b"], ["c", "d"]], + ), + ], +) +def test_finding_lists(dictionary, expected_result): + result = _lists(dictionary) + + assert list(result) == expected_result + + +def test_finding_data(): + data = {"a": {"b": [{"x": 2, "y": 3}, {"x": 1, "y": 5}]}} + + result, fieldnames = _find_data(data, None, fields={"x"}) + + assert fieldnames is None + assert result == [{"x": 2, "y": 3}, {"x": 1, "y": 5}] From b5219dfdfe56e918f5f6bab7e7dcda705f4d1752 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 22 Apr 2020 18:35:55 +0200 Subject: [PATCH 071/102] plot: raise if wrong fields provided --- dvc/repo/plot/data.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 1d02a9e842..78b6d4ccb1 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -69,10 +69,6 @@ def plot_data(filename, revision, content): def _filter_fields(data_points, fieldnames=None, fields=None, **kwargs): - """ - Try to filter each data_point, preserving only keys specified in - fields. This method assumes that data_points is list of dicts. - """ if not fields: return data_points, fieldnames assert isinstance(fields, set) @@ -80,7 +76,15 @@ def _filter_fields(data_points, fieldnames=None, fields=None, **kwargs): new_data = [] for data_point in data_points: new_dp = copy(data_point) - to_del = set(data_point.keys()) - fields + + keys = set(data_point.keys()) + if keys & fields != fields: + raise DvcException( + "Could not find some of provided fields: " + "'{}' in '{}'.".format(", ".join(fields), ", ".join(keys)) + ) + + to_del = keys - fields for key in to_del: del new_dp[key] if fieldnames and key in fieldnames: From 9bc6c3f38d159e8a153c4bbaea1f90f6e07e9c8a Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 22 Apr 2020 18:42:06 +0200 Subject: [PATCH 072/102] plot: command description --- dvc/command/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index ca606fa120..9c197e8bbb 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -109,7 +109,7 @@ def add_parser(subparsers, parent_parser): plot_show_parser.add_argument( "--filter", default=None, - help="Choose which fileds or path to put into plot.", + help="Choose which fileds or jsonpath to put into plot.", ) plot_show_parser.set_defaults(func=CmdPlotShow) From 5ef304af8f36320151bfb74bf06de8647590ac81 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 23 Apr 2020 13:02:35 +0200 Subject: [PATCH 073/102] plot: default: pass y axis info for default plot --- dvc/repo/plot/__init__.py | 17 ++++++++++++++++- dvc/repo/plot/data.py | 4 ++-- tests/func/test_plot.py | 25 +++++++++++++------------ 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 8c06804a3e..99eb57efa5 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -1,7 +1,9 @@ import itertools +import json import logging import os +from funcy import first from dvc.exceptions import DvcException from dvc.template import Template @@ -76,7 +78,20 @@ def fill_template( ) ) - return Template.fill(template_path, template_data, datafile) + filled_template = Template.fill(template_path, template_data, datafile) + + if default_plot: + assert len(template_data) == 1 + tmp_plot = json.loads(filled_template) + tmp_plot["title"] = first(template_data.keys()) + tmp_plot["encoding"]["y"]["field"] = first( + set(first(first(template_data.values())).keys()) - {"x", "rev"} + ) + filled_template = json.dumps( + tmp_plot, indent=4, separators=(",", ": "), sort_keys=True + ) + + return filled_template def plot( diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 78b6d4ccb1..8d28cece8d 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -106,8 +106,8 @@ def _transform_to_default_data( y = last(list(first(data_points).keys())) for index, data_point in enumerate(data_points): - new_data.append({"x": index, "y": data_point[y]}) - return new_data, ["x", "y"] + new_data.append({"x": index, y: data_point[y]}) + return new_data, ["x", y] def _apply_path(data, fieldnames=None, path=None, **kwargs): diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 7cfe2171e0..fc917b3f25 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -52,6 +52,7 @@ def _write_json(tmp_dir, metric, filename): def test_plot_csv_one_column(tmp_dir, scm, dvc): + # for single column write with no header, hence `value` in result metric = [{"val": 2}, {"val": 3}] _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") @@ -60,8 +61,8 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): plot_json = json.loads((tmp_dir / result).read_text()) assert plot_json["data"]["values"] == [ - {"y": "2", "x": 0, "rev": "workspace"}, - {"y": "3", "x": 1, "rev": "workspace"}, + {"value": "2", "x": 0, "rev": "workspace"}, + {"value": "3", "x": 1, "rev": "workspace"}, ] @@ -76,8 +77,8 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): result = dvc.plot("metric.csv") plot_json = json.loads((tmp_dir / result).read_text()) assert plot_json["data"]["values"] == [ - {"y": "2", "x": 0, "rev": "workspace"}, - {"y": "3", "x": 1, "rev": "workspace"}, + {"val": "2", "x": 0, "rev": "workspace"}, + {"val": "3", "x": 1, "rev": "workspace"}, ] @@ -90,8 +91,8 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): plot_json = json.loads((tmp_dir / result).read_text()) assert plot_json["data"]["values"] == [ - {"y": 2, "x": 0, "rev": "workspace"}, - {"y": 3, "x": 1, "rev": "workspace"}, + {"val": 2, "x": 0, "rev": "workspace"}, + {"val": 3, "x": 1, "rev": "workspace"}, ] @@ -107,8 +108,8 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): plot_json = json.loads((tmp_dir / result).read_text()) assert plot_json["data"]["values"] == [ - {"y": 2, "x": 0, "rev": "workspace"}, - {"y": 3, "x": 1, "rev": "workspace"}, + {"val": 2, "x": 0, "rev": "workspace"}, + {"val": 3, "x": 1, "rev": "workspace"}, ] @@ -362,8 +363,8 @@ def test_plot_default_choose_column(tmp_dir, scm, dvc): plot_json = json.loads((tmp_dir / result).read_text()) assert plot_json["data"]["values"] == [ - {"x": 0, "y": 2, "rev": "workspace"}, - {"x": 1, "y": 3, "rev": "workspace"}, + {"x": 0, "b": 2, "rev": "workspace"}, + {"x": 1, "b": 3, "rev": "workspace"}, ] @@ -377,8 +378,8 @@ def test_plot_embed(tmp_dir, scm, dvc): page_content = BeautifulSoup((tmp_dir / result).read_text()) data_dump = json.dumps( [ - {"y": 2, "x": 0, "rev": "workspace"}, - {"y": 3, "x": 1, "rev": "workspace"}, + {"val": 2, "x": 0, "rev": "workspace"}, + {"val": 3, "x": 1, "rev": "workspace"}, ], sort_keys=True, ) From 39c5d10388cf4cafb1de9f347b915c090bba4526 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 23 Apr 2020 13:46:01 +0200 Subject: [PATCH 074/102] plot: get rid of fieldnames, expect ordered data --- dvc/repo/plot/data.py | 50 +++++++++++++++++------------------------ tests/unit/test_plot.py | 5 ++--- 2 files changed, 23 insertions(+), 32 deletions(-) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 8d28cece8d..ba7c17f2b6 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -68,9 +68,9 @@ def plot_data(filename, revision, content): raise PlotMetricTypeError(filename) -def _filter_fields(data_points, fieldnames=None, fields=None, **kwargs): +def _filter_fields(data_points, fields=None, **kwargs): if not fields: - return data_points, fieldnames + return data_points assert isinstance(fields, set) new_data = [] @@ -87,32 +87,25 @@ def _filter_fields(data_points, fieldnames=None, fields=None, **kwargs): to_del = keys - fields for key in to_del: del new_dp[key] - if fieldnames and key in fieldnames: - fieldnames.remove(key) new_data.append(new_dp) - return new_data, fieldnames + return new_data -def _transform_to_default_data( - data_points, fieldnames=None, default_plot=False, **kwargs -): +def _transform_to_default_data(data_points, default_plot=False, **kwargs): if not default_plot: - return data_points, fieldnames + return data_points new_data = [] - if fieldnames: - y = last(fieldnames) - else: - y = last(list(first(data_points).keys())) + y = last(list(first(data_points).keys())) for index, data_point in enumerate(data_points): new_data.append({"x": index, y: data_point[y]}) - return new_data, ["x", y] + return new_data -def _apply_path(data, fieldnames=None, path=None, **kwargs): +def _apply_path(data, path=None, **kwargs): if not path or not isinstance(data, dict): - return data, fieldnames + return data import jsonpath_ng @@ -124,10 +117,8 @@ def _apply_path(data, fieldnames=None, path=None, **kwargs): and isinstance(first(first_datum.value), dict) ): data_points = first_datum.value - fieldnames = list(first(data_points).keys()) elif len(first_datum.path.fields) == 1: field_name = first(first_datum.path.fields) - fieldnames = [field_name] data_points = [{field_name: datum.value} for datum in found] else: raise PlotDataStructureError() @@ -137,7 +128,7 @@ def _apply_path(data, fieldnames=None, path=None, **kwargs): ): raise PlotDataStructureError() - return data_points, fieldnames + return data_points def _lists(dictionary): @@ -148,18 +139,16 @@ def _lists(dictionary): yield value -def _find_data(data, fieldnames=None, fields=None, **kwargs): +def _find_data(data, fields=None, **kwargs): if not fields or not isinstance(data, dict): - return data, fieldnames + return data assert isinstance(fields, set) for l in _lists(data): if all([isinstance(dp, dict) for dp in l]): if set(first(l).keys()) & fields == fields: - if fieldnames: - return l, [f for f in fieldnames if f in fields] - return l, None + return l raise PlotDataStructureError() @@ -168,7 +157,6 @@ def __init__(self, filename, revision, content, **kwargs): self.filename = filename self.revision = revision self.content = content - self.fieldnames = None @property def raw(self): @@ -179,10 +167,9 @@ def _processors(self): def to_datapoints(self, **kwargs): data = self.raw - fieldnames = self.fieldnames for data_proc in self._processors(): - data, fieldnames = data_proc(data, fieldnames, **kwargs) + data = data_proc(data, **kwargs) for data_point in data: data_point["rev"] = self.revision @@ -221,8 +208,13 @@ def raw(self): delimiter=self.delimiter, ) - self.fieldnames = reader.fieldnames - return [row for row in reader] + fieldnames = reader.fieldnames + data = [row for row in reader] + + return [ + OrderedDict([(field, data_point[field]) for field in fieldnames]) + for data_point in data + ] class YAMLPLotData(PlotData): diff --git a/tests/unit/test_plot.py b/tests/unit/test_plot.py index 6ebba5e469..3d756df779 100644 --- a/tests/unit/test_plot.py +++ b/tests/unit/test_plot.py @@ -15,7 +15,7 @@ def test_parse_json(path, expected_result): "some": {"path": [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]} } - result, _ = _apply_path(value, path=path) + result = _apply_path(value, path=path) assert result == expected_result @@ -40,7 +40,6 @@ def test_finding_lists(dictionary, expected_result): def test_finding_data(): data = {"a": {"b": [{"x": 2, "y": 3}, {"x": 1, "y": 5}]}} - result, fieldnames = _find_data(data, None, fields={"x"}) + result = _find_data(data, fields={"x"}) - assert fieldnames is None assert result == [{"x": 2, "y": 3}, {"x": 1, "y": 5}] From aed06346621f198bfa126e57394c0a12760b67f7 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 23 Apr 2020 15:56:19 +0200 Subject: [PATCH 075/102] plot: handle default plot in separate method --- dvc/repo/plot/__init__.py | 55 +++++++++++++++++++++++++++------------ dvc/repo/plot/data.py | 20 ++++---------- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 99eb57efa5..cb31de5c2c 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -3,10 +3,11 @@ import logging import os -from funcy import first +from funcy import first, last from dvc.exceptions import DvcException -from dvc.template import Template +from dvc.repo.plot.data import PlotData +from dvc.template import Template, NoDataForTemplateError from dvc.repo import locked logger = logging.getLogger(__name__) @@ -70,28 +71,50 @@ def fill_template( template_data[datafile] = list( itertools.chain.from_iterable( [ - pd.to_datapoints( - fields=fields, default_plot=default_plot, path=path - ) + pd.to_datapoints(fields=fields, path=path) for pd in plot_datas ] ) ) - filled_template = Template.fill(template_path, template_data, datafile) - if default_plot: - assert len(template_data) == 1 - tmp_plot = json.loads(filled_template) - tmp_plot["title"] = first(template_data.keys()) - tmp_plot["encoding"]["y"]["field"] = first( - set(first(first(template_data.values())).keys()) - {"x", "rev"} - ) - filled_template = json.dumps( - tmp_plot, indent=4, separators=(",", ": "), sort_keys=True + return _fill_default_template(repo, template_data) + + return Template.fill(template_path, template_data, datafile) + + +def _fill_default_template(repo, template_data): + if len(template_data) == 0: + raise NoDataForTemplateError(repo.plot_templates.default_template) + assert (len(template_data)) == 1 + datafile, data = first(template_data.items()) + + ordered_keys = list(first(data).keys()) + ordered_keys.remove(PlotData.REVISION_FIELD) + data_field = last(ordered_keys) + + new_data = [] + for index, data_point in enumerate(data): + new_data.append( + { + "x": index, + data_field: data_point[data_field], + PlotData.REVISION_FIELD: data_point[PlotData.REVISION_FIELD], + } ) - return filled_template + tmp_plot = json.loads( + Template.fill( + repo.plot_templates.default_template, + {datafile: new_data}, + datafile, + ) + ) + tmp_plot["title"] = datafile + tmp_plot["encoding"]["y"]["field"] = data_field + return json.dumps( + tmp_plot, indent=4, separators=(",", ": "), sort_keys=True + ) def plot( diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index ba7c17f2b6..c9a11a79de 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -6,7 +6,7 @@ from collections import OrderedDict from copy import copy -from funcy import first, last, cached_property +from funcy import first, cached_property from ruamel import yaml from dvc.exceptions import DvcException, PathMissingError @@ -91,18 +91,6 @@ def _filter_fields(data_points, fields=None, **kwargs): return new_data -def _transform_to_default_data(data_points, default_plot=False, **kwargs): - if not default_plot: - return data_points - - new_data = [] - y = last(list(first(data_points).keys())) - - for index, data_point in enumerate(data_points): - new_data.append({"x": index, y: data_point[y]}) - return new_data - - def _apply_path(data, path=None, **kwargs): if not path or not isinstance(data, dict): return data @@ -153,6 +141,8 @@ def _find_data(data, fields=None, **kwargs): class PlotData: + REVISION_FIELD = "rev" + def __init__(self, filename, revision, content, **kwargs): self.filename = filename self.revision = revision @@ -163,7 +153,7 @@ def raw(self): raise NotImplementedError def _processors(self): - return [_filter_fields, _transform_to_default_data] + return [_filter_fields] def to_datapoints(self, **kwargs): data = self.raw @@ -172,7 +162,7 @@ def to_datapoints(self, **kwargs): data = data_proc(data, **kwargs) for data_point in data: - data_point["rev"] = self.revision + data_point[self.REVISION_FIELD] = self.revision return data From 82638204b78a85140cf5aa01122a60800bb46a18 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 23 Apr 2020 16:43:11 +0200 Subject: [PATCH 076/102] plot: fix default --- dvc/repo/plot/__init__.py | 60 +++++++++++++++++++++------------------ dvc/template.py | 3 -- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index cb31de5c2c..22db347b2b 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -1,4 +1,3 @@ -import itertools import json import logging import os @@ -68,33 +67,33 @@ def fill_template( from dvc.repo.plot.data import _load_from_revisions plot_datas = _load_from_revisions(repo, datafile, revisions) - template_data[datafile] = list( - itertools.chain.from_iterable( - [ - pd.to_datapoints(fields=fields, path=path) - for pd in plot_datas - ] - ) - ) - - if default_plot: - return _fill_default_template(repo, template_data) - return Template.fill(template_path, template_data, datafile) + tmp_data = [] + for pd in plot_datas: + rev_data_points = pd.to_datapoints(fields=fields, path=path) + if default_plot: + rev_data_points = _to_default_data(rev_data_points) + tmp_data.extend(rev_data_points) + template_data[datafile] = tmp_data -def _fill_default_template(repo, template_data): if len(template_data) == 0: - raise NoDataForTemplateError(repo.plot_templates.default_template) - assert (len(template_data)) == 1 - datafile, data = first(template_data.items()) + raise NoDataForTemplateError(template_path) + + filled_template = Template.fill(template_path, template_data, datafile) + + if default_plot: + return _fix_default_template(template_data, filled_template) - ordered_keys = list(first(data).keys()) - ordered_keys.remove(PlotData.REVISION_FIELD) - data_field = last(ordered_keys) + return filled_template + +def _to_default_data(data_points): + keys = list(first(data_points).keys()) + keys.remove(PlotData.REVISION_FIELD) + data_field = last(keys) new_data = [] - for index, data_point in enumerate(data): + for index, data_point in enumerate(data_points): new_data.append( { "x": index, @@ -102,14 +101,19 @@ def _fill_default_template(repo, template_data): PlotData.REVISION_FIELD: data_point[PlotData.REVISION_FIELD], } ) + return new_data - tmp_plot = json.loads( - Template.fill( - repo.plot_templates.default_template, - {datafile: new_data}, - datafile, - ) - ) + +def _fix_default_template(template_data, plot_json): + assert len(template_data) == 1 + datafile, data = first(template_data.items()) + + keys = list(first(data).keys()) + keys.remove(PlotData.REVISION_FIELD) + keys.remove("x") + data_field = first(keys) + + tmp_plot = json.loads(plot_json) tmp_plot["title"] = datafile tmp_plot["encoding"]["y"]["field"] = data_field return json.dumps( diff --git a/dvc/template.py b/dvc/template.py index d5a35ad46a..ad4c4b54d8 100644 --- a/dvc/template.py +++ b/dvc/template.py @@ -103,9 +103,6 @@ def fill(template_path, data, priority_datafile=None): else: key = file - if key not in data: - raise NoDataForTemplateError(template_path) - result_content = result_content.replace( placeholder, json.dumps( From 8a6d9e4721b0d8c09c9d8cff3021a0b69f1de760 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 23 Apr 2020 16:59:28 +0200 Subject: [PATCH 077/102] plot: command option names fixes --- dvc/command/plot.py | 28 +++++++++++++++------------- tests/unit/command/test_plot.py | 8 ++++---- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 9c197e8bbb..82b6931685 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -17,27 +17,27 @@ def _revisions(self): def run(self): fields = None - path = None - if self.args.filter: - if self.args.filter.startswith("$"): - path = self.args.filter + jsonpath = None + if self.args.fields: + if self.args.fields.startswith("$"): + jsonpath = self.args.fields else: - fields = set(self.args.filter.split(",")) + fields = set(self.args.fields.split(",")) try: - result = self.repo.plot( + result_path = self.repo.plot( datafile=self.args.datafile, template=self.args.template, revisions=self._revisions(), - fname=self.args.file, + fname=self.args.result, fields=fields, - path=path, + path=jsonpath, embed=not self.args.show_json, ) except DvcException: logger.exception("") return 1 logger.info( - "file://{}".format(os.path.join(self.repo.root_dir, result)) + "file://{}".format(os.path.join(self.repo.root_dir, result_path)) ) return 0 @@ -98,7 +98,7 @@ def add_parser(subparsers, parent_parser): "datafile", nargs="?", default=None, help="Data to be visualized." ) plot_show_parser.add_argument( - "-f", "--file", help="Name of the generated file." + "-r", "--result", help="Name of the generated file." ) plot_show_parser.add_argument( "--show-json", @@ -107,7 +107,8 @@ def add_parser(subparsers, parent_parser): help="Do not wrap vega plot json with HTML.", ) plot_show_parser.add_argument( - "--filter", + "-f", + "--fields", default=None, help="Choose which fileds or jsonpath to put into plot.", ) @@ -139,7 +140,7 @@ def add_parser(subparsers, parent_parser): help="Data to be visualized.", ) plot_diff_parser.add_argument( - "-f", "--file", help="Name of the generated file." + "-r", "--result", help="Name of the generated file." ) plot_diff_parser.add_argument( "revisions", @@ -155,7 +156,8 @@ def add_parser(subparsers, parent_parser): help="Do not wrap vega plot json with HTML.", ) plot_diff_parser.add_argument( - "--filter", + "-f", + "--fields", default=None, help="Choose which filed(s) or jsonpath to put into plot.", ) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index cf3411f205..4ca25b079a 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -9,13 +9,13 @@ def test_metrics_diff(mocker): [ "plot", "diff", - "-f", + "-r", "result.extension", "-t", "template", "-d", "datafile", - "--filter", + "--field", "column1,column2", "--show-json", "HEAD", @@ -48,11 +48,11 @@ def test_metrics_show(mocker): [ "plot", "show", - "-f", + "-r", "result.extension", "-t", "template", - "--filter", + "-f", "$.data", "--show-json", "datafile", From 9b947e0a485ea18d051059791d468f6565ed79f2 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 23 Apr 2020 17:08:21 +0200 Subject: [PATCH 078/102] refactoring --- dvc/repo/plot/__init__.py | 3 ++- dvc/repo/plot/data.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 22db347b2b..7d7512dd38 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -172,7 +172,8 @@ def _infer_result_file(datafile, template_path, embed): if result_file == datafile or result_file == template_path: raise DvcException( - "Could not infer plot name, please provide it " "with -f option." + "Cannot create '{}': file already exists, use -r to redefine " + "it".format(result_file) ) return result_file diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index c9a11a79de..6596a5412c 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -80,8 +80,7 @@ def _filter_fields(data_points, fields=None, **kwargs): keys = set(data_point.keys()) if keys & fields != fields: raise DvcException( - "Could not find some of provided fields: " - "'{}' in '{}'.".format(", ".join(fields), ", ".join(keys)) + "Could not find fields: '{}'.".format(", ".join(fields)) ) to_del = keys - fields From a6b2faf7e5269bfd14bddf7f2ae29913b6305722 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 23 Apr 2020 18:17:03 +0200 Subject: [PATCH 079/102] fixes --- dvc/repo/plot/data.py | 16 +++++++++++----- tests/func/test_plot.py | 31 +++++++++++++++++++++++++++++++ tests/unit/test_plot.py | 5 +++-- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 6596a5412c..3b3b3427bf 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -68,7 +68,7 @@ def plot_data(filename, revision, content): raise PlotMetricTypeError(filename) -def _filter_fields(data_points, fields=None, **kwargs): +def _filter_fields(data_points, filename, revision, fields=None, **kwargs): if not fields: return data_points assert isinstance(fields, set) @@ -80,7 +80,9 @@ def _filter_fields(data_points, fields=None, **kwargs): keys = set(data_point.keys()) if keys & fields != fields: raise DvcException( - "Could not find fields: '{}'.".format(", ".join(fields)) + "Could not find fields: '{}' for '{}' at '{}'.".format( + ", " "".join(fields), filename, revision + ) ) to_del = keys - fields @@ -127,10 +129,12 @@ def _lists(dictionary): def _find_data(data, fields=None, **kwargs): - if not fields or not isinstance(data, dict): + if not isinstance(data, dict): return data - assert isinstance(fields, set) + if not fields: + # just look for first list of dicts + fields = set() for l in _lists(data): if all([isinstance(dp, dict) for dp in l]): @@ -158,7 +162,9 @@ def to_datapoints(self, **kwargs): data = self.raw for data_proc in self._processors(): - data = data_proc(data, **kwargs) + data = data_proc( + data, filename=self.filename, revision=self.revision, **kwargs + ) for data_point in data: data_point[self.REVISION_FIELD] = self.revision diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index fc917b3f25..95d7bbc3ab 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -132,6 +132,37 @@ def test_plot_confusion(tmp_dir, dvc): ] +def test_plot_multiple_revs_default(tmp_dir, scm, dvc): + metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + _write_json(tmp_dir, metric_1, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "init", "v1") + + metric_2 = [{"x": 1, "y": 3}, {"x": 2, "y": 5}] + _write_json(tmp_dir, metric_2, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "second", "v2") + + metric_3 = [{"x": 1, "y": 5}, {"x": 2, "y": 6}] + _write_json(tmp_dir, metric_3, "metric.json") + _run_with_metric(tmp_dir, "metric.json", "third") + + result = dvc.plot( + "metric.json", + fields={"y"}, + revisions=["HEAD", "v2", "v1"], + fname="result.json", + ) + + plot_json = json.loads((tmp_dir / result).read_text()) + assert plot_json["data"]["values"] == [ + {"y": 5, "x": 0, "rev": "HEAD"}, + {"y": 6, "x": 1, "rev": "HEAD"}, + {"y": 3, "x": 0, "rev": "v2"}, + {"y": 5, "x": 1, "rev": "v2"}, + {"y": 2, "x": 0, "rev": "v1"}, + {"y": 3, "x": 1, "rev": "v1"}, + ] + + def test_plot_multiple_revs(tmp_dir, scm, dvc): shutil.copy( fspath(tmp_dir / ".dvc" / "plot" / "default.json"), "template.json" diff --git a/tests/unit/test_plot.py b/tests/unit/test_plot.py index 3d756df779..fcf2ce30b1 100644 --- a/tests/unit/test_plot.py +++ b/tests/unit/test_plot.py @@ -37,9 +37,10 @@ def test_finding_lists(dictionary, expected_result): assert list(result) == expected_result -def test_finding_data(): +@pytest.mark.parametrize("fields", [{"x"}, set()]) +def test_finding_data(fields): data = {"a": {"b": [{"x": 2, "y": 3}, {"x": 1, "y": 5}]}} - result = _find_data(data, fields={"x"}) + result = _find_data(data, fields=fields) assert result == [{"x": 2, "y": 3}, {"x": 1, "y": 5}] From 8d12dd4338597dacdee5a1073d036669d92b44dd Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 24 Apr 2020 15:19:58 +0200 Subject: [PATCH 080/102] plot: provide option for stdout redirection --- dvc/command/plot.py | 61 +++++++++++++++-- dvc/repo/plot/__init__.py | 10 +-- tests/func/test_plot.py | 112 ++++++++++++++------------------ tests/unit/command/test_plot.py | 6 +- 4 files changed, 108 insertions(+), 81 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 82b6931685..adfc635278 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -14,8 +14,34 @@ class CmdPLot(CmdBase): def _revisions(self): raise NotImplementedError - def run(self): + def _result_file(self): + if self.args.result: + return self.args.result + + extension = self._result_extension() + base = self._result_basename() + + result_file = base + extension + if os.path.exists(result_file): + raise DvcException( + "Cannot create '{}': file already exists, use -r to redefine " + "it".format(result_file) + ) + return result_file + + def _result_basename(self): + if self.args.datafile: + return os.path.splitext(self.args.datafile)[0] + return "plot" + def _result_extension(self): + if not self.args.show_json: + return ".html" + elif self.args.template: + return os.path.splitext(self.args.template)[-1] + return ".json" + + def run(self): fields = None jsonpath = None if self.args.fields: @@ -24,21 +50,32 @@ def run(self): else: fields = set(self.args.fields.split(",")) try: - result_path = self.repo.plot( + plot_string = self.repo.plot( datafile=self.args.datafile, template=self.args.template, revisions=self._revisions(), - fname=self.args.result, fields=fields, path=jsonpath, embed=not self.args.show_json, ) + + if self.args.stdout: + logger.info(plot_string) + else: + result_path = self._result_file() + with open(result_path, "w") as fobj: + fobj.write(plot_string) + + logger.info( + "file://{}".format( + os.path.join(self.repo.root_dir, result_path) + ) + ) + except DvcException: logger.exception("") return 1 - logger.info( - "file://{}".format(os.path.join(self.repo.root_dir, result_path)) - ) + return 0 @@ -112,6 +149,12 @@ def add_parser(subparsers, parent_parser): default=None, help="Choose which fileds or jsonpath to put into plot.", ) + plot_show_parser.add_argument( + "--stdout", + action="store_true", + default=False, + help="Print plot content to stdout", + ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -161,4 +204,10 @@ def add_parser(subparsers, parent_parser): default=None, help="Choose which filed(s) or jsonpath to put into plot.", ) + plot_diff_parser.add_argument( + "--stdout", + action="store_true", + default=False, + help="Print plot content to stdout", + ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 7d7512dd38..5085338d38 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -126,7 +126,6 @@ def plot( datafile=None, template=None, revisions=None, - fname=None, fields=None, path=None, embed=False, @@ -149,15 +148,10 @@ def plot( div = DIV_HTML.format(id="plot", vega_json=plot_content) plot_content = PAGE_HTML.format(divs=div) - if not fname: - fname = _infer_result_file(datafile, template_path, embed) + return plot_content - with open(fname, "w") as fobj: - fobj.write(plot_content) - return fname - -def _infer_result_file(datafile, template_path, embed): +def infer_result_file(datafile, template_path, embed): if datafile: tmp = datafile else: diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 95d7bbc3ab..cbc1d4126f 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -57,10 +57,10 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") - result = dvc.plot("metric.csv") + plot_string = dvc.plot("metric.csv") - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"value": "2", "x": 0, "rev": "workspace"}, {"value": "3", "x": 1, "rev": "workspace"}, ] @@ -74,9 +74,10 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") - result = dvc.plot("metric.csv") - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_string = dvc.plot("metric.csv") + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"val": "2", "x": 0, "rev": "workspace"}, {"val": "3", "x": 1, "rev": "workspace"}, ] @@ -87,9 +88,9 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot("metric.json", fname="result.json") + plot_string = dvc.plot("metric.json") - plot_json = json.loads((tmp_dir / result).read_text()) + plot_json = json.loads(plot_string) assert plot_json["data"]["values"] == [ {"val": 2, "x": 0, "rev": "workspace"}, {"val": 3, "x": 1, "rev": "workspace"}, @@ -104,10 +105,10 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot("metric.json", fname="result.json") + plot_string = dvc.plot("metric.json") - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"val": 2, "x": 0, "rev": "workspace"}, {"val": 3, "x": 1, "rev": "workspace"}, ] @@ -121,12 +122,10 @@ def test_plot_confusion(tmp_dir, dvc): _write_json(tmp_dir, confusion_matrix, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot( - datafile="metric.json", template="confusion", fname="result.json" - ) + plot_string = dvc.plot(datafile="metric.json", template="confusion") - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"predicted": "B", "actual": "A", "rev": "workspace"}, {"predicted": "A", "actual": "A", "rev": "workspace"}, ] @@ -145,15 +144,12 @@ def test_plot_multiple_revs_default(tmp_dir, scm, dvc): _write_json(tmp_dir, metric_3, "metric.json") _run_with_metric(tmp_dir, "metric.json", "third") - result = dvc.plot( - "metric.json", - fields={"y"}, - revisions=["HEAD", "v2", "v1"], - fname="result.json", + plot_string = dvc.plot( + "metric.json", fields={"y"}, revisions=["HEAD", "v2", "v1"], ) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"y": 5, "x": 0, "rev": "HEAD"}, {"y": 6, "x": 1, "rev": "HEAD"}, {"y": 3, "x": 0, "rev": "v2"}, @@ -180,15 +176,14 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): _write_json(tmp_dir, metric_3, "metric.json") _run_with_metric(tmp_dir, "metric.json", "third") - result = dvc.plot( + plot_string = dvc.plot( "metric.json", template="template.json", revisions=["HEAD", "v2", "v1"], - fname="result.json", ) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"y": 5, "x": 1, "rev": "HEAD"}, {"y": 6, "x": 2, "rev": "HEAD"}, {"y": 3, "x": 1, "rev": "v2"}, @@ -208,16 +203,14 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): caplog.clear() with caplog.at_level(logging.WARNING, "dvc"): - result = dvc.plot( - "metric.json", revisions=["v1", "v2"], fname="result.json" - ) + plot_string = dvc.plot("metric.json", revisions=["v1", "v2"]) assert ( "File 'metric.json' was not found at: 'v1'. " "It will not be plotted." in caplog.text ) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"y": 2, "x": 0, "rev": "v2"}, {"y": 3, "x": 1, "rev": "v2"}, ] @@ -259,12 +252,10 @@ def test_custom_template(tmp_dir, scm, dvc, custom_template): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot( - "metric.json", fspath(custom_template), fname="result.json" - ) + plot_string = dvc.plot("metric.json", fspath(custom_template)) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"a": 1, "b": 2, "rev": "workspace"}, {"a": 2, "b": 3, "rev": "workspace"}, ] @@ -285,12 +276,10 @@ def test_custom_template_with_specified_data( _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot( - datafile=None, template=fspath(custom_template), fname="result.json" - ) + plot_string = dvc.plot(datafile=None, template=fspath(custom_template)) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"a": 1, "b": 2, "rev": "workspace"}, {"a": 2, "b": 3, "rev": "workspace"}, ] @@ -311,14 +300,12 @@ def test_plot_override_specified_data_source(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric2.json") _run_with_metric(tmp_dir, "metric2.json", "init", "v1") - result = dvc.plot( - datafile="metric2.json", - template="newtemplate.json", - fname="result.json", + plot_string = dvc.plot( + datafile="metric2.json", template="newtemplate.json", ) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"a": 1, "b": 2, "rev": "workspace"}, {"a": 2, "b": 3, "rev": "workspace"}, ] @@ -334,15 +321,13 @@ def test_should_embed_vega_json_template(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot( - "metric.json", "template.json", fname="result.json", embed=False - ) + plot_string = dvc.plot("metric.json", "template.json", embed=False) - plot_json = json.loads((tmp_dir / result).read_text()) + plot_content = json.loads(plot_string) assert [ {"x": 1, "y": 2, "rev": "workspace"}, {"x": 2, "y": 3, "rev": "workspace"}, - ] == plot_json["data"]["values"] + ] == plot_content["data"]["values"] def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): @@ -371,15 +356,12 @@ def test_plot_choose_columns(tmp_dir, scm, dvc, custom_template): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot( - "metric.json", - fspath(custom_template), - fields={"b", "c"}, - fname="result.json", + plot_string = dvc.plot( + "metric.json", fspath(custom_template), fields={"b", "c"}, ) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"b": 2, "c": 3, "rev": "workspace"}, {"b": 3, "c": 4, "rev": "workspace"}, ] @@ -390,10 +372,10 @@ def test_plot_default_choose_column(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - result = dvc.plot("metric.json", fields={"b"}, fname="result.json") + plot_string = dvc.plot("metric.json", fields={"b"}) - plot_json = json.loads((tmp_dir / result).read_text()) - assert plot_json["data"]["values"] == [ + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ {"x": 0, "b": 2, "rev": "workspace"}, {"x": 1, "b": 3, "rev": "workspace"}, ] @@ -404,9 +386,9 @@ def test_plot_embed(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - result = dvc.plot("metric.json", fname="result.json", embed=True) + plot_string = dvc.plot("metric.json", embed=True) - page_content = BeautifulSoup((tmp_dir / result).read_text()) + page_content = BeautifulSoup(plot_string) data_dump = json.dumps( [ {"val": 2, "x": 0, "rev": "workspace"}, diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 4ca25b079a..52587b9a38 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -18,6 +18,7 @@ def test_metrics_diff(mocker): "--field", "column1,column2", "--show-json", + "--stdout", "HEAD", "tag1", "tag2", @@ -28,6 +29,7 @@ def test_metrics_diff(mocker): cmd = cli_args.func(cli_args) m = mocker.patch.object(cmd.repo, "plot", autospec=True) + mocker.patch("builtins.open") mocker.patch("os.path.join") assert cmd.run() == 0 @@ -36,7 +38,6 @@ def test_metrics_diff(mocker): datafile="datafile", template="template", revisions=["HEAD", "tag1", "tag2"], - fname="result.extension", fields={"column1", "column2"}, path=None, embed=False, @@ -56,6 +57,7 @@ def test_metrics_show(mocker): "$.data", "--show-json", "datafile", + "--stdout", ] ) assert cli_args.func == CmdPlotShow @@ -63,6 +65,7 @@ def test_metrics_show(mocker): cmd = cli_args.func(cli_args) m = mocker.patch.object(cmd.repo, "plot", autospec=True) + mocker.patch("builtins.open") mocker.patch("os.path.join") assert cmd.run() == 0 @@ -70,7 +73,6 @@ def test_metrics_show(mocker): m.assert_called_once_with( datafile="datafile", template="template", - fname="result.extension", revisions=None, fields=None, path="$.data", From d9b42fb6f6200f84f7fab1abc5c852c654c167a4 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 24 Apr 2020 15:32:53 +0200 Subject: [PATCH 081/102] plot: rename show-json to no-html --- dvc/command/plot.py | 10 ++++++---- tests/unit/command/test_plot.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index adfc635278..08455663e4 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -35,7 +35,7 @@ def _result_basename(self): return "plot" def _result_extension(self): - if not self.args.show_json: + if not self.args.no_html: return ".html" elif self.args.template: return os.path.splitext(self.args.template)[-1] @@ -56,7 +56,7 @@ def run(self): revisions=self._revisions(), fields=fields, path=jsonpath, - embed=not self.args.show_json, + embed=not self.args.no_html, ) if self.args.stdout: @@ -138,7 +138,7 @@ def add_parser(subparsers, parent_parser): "-r", "--result", help="Name of the generated file." ) plot_show_parser.add_argument( - "--show-json", + "--no-html", action="store_true", default=False, help="Do not wrap vega plot json with HTML.", @@ -150,6 +150,7 @@ def add_parser(subparsers, parent_parser): help="Choose which fileds or jsonpath to put into plot.", ) plot_show_parser.add_argument( + "-o", "--stdout", action="store_true", default=False, @@ -193,7 +194,7 @@ def add_parser(subparsers, parent_parser): ) plot_diff_parser.add_argument( - "--show-json", + "--no-html", action="store_true", default=False, help="Do not wrap vega plot json with HTML.", @@ -205,6 +206,7 @@ def add_parser(subparsers, parent_parser): help="Choose which filed(s) or jsonpath to put into plot.", ) plot_diff_parser.add_argument( + "-o", "--stdout", action="store_true", default=False, diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 52587b9a38..2482560434 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -17,7 +17,7 @@ def test_metrics_diff(mocker): "datafile", "--field", "column1,column2", - "--show-json", + "--no-html", "--stdout", "HEAD", "tag1", @@ -55,7 +55,7 @@ def test_metrics_show(mocker): "template", "-f", "$.data", - "--show-json", + "--no-html", "datafile", "--stdout", ] From 6a9a3f3793f4e99335ba1a74c36f7bc727587edf Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 24 Apr 2020 16:04:04 +0200 Subject: [PATCH 082/102] plot: add no-csv-header option --- dvc/command/plot.py | 13 +++++++++++++ dvc/repo/plot/__init__.py | 15 ++++++++++++--- dvc/repo/plot/data.py | 24 +++++++++--------------- tests/func/test_plot.py | 6 +++--- tests/unit/command/test_plot.py | 5 ++++- 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 08455663e4..1b02f3cc6f 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -57,6 +57,7 @@ def run(self): fields=fields, path=jsonpath, embed=not self.args.no_html, + csv_header=not self.args.no_csv_header, ) if self.args.stdout: @@ -156,6 +157,12 @@ def add_parser(subparsers, parent_parser): default=False, help="Print plot content to stdout", ) + plot_show_parser.add_argument( + "--no-csv-header", + action="store_true", + default=False, + help="Provided csv datafile does not have a header.", + ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -212,4 +219,10 @@ def add_parser(subparsers, parent_parser): default=False, help="Print plot content to stdout", ) + plot_diff_parser.add_argument( + "--no-csv-header", + action="store_true", + default=False, + help="Provided csv datafile does not have a header.", + ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 5085338d38..1da859aeed 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -56,7 +56,13 @@ def _evaluate_templatepath(repo, template=None): @locked def fill_template( - repo, datafile, template_path, revisions, fields=None, path=None + repo, + datafile, + template_path, + revisions, + fields=None, + path=None, + csv_header=True, ): default_plot = template_path == repo.plot_templates.default_template @@ -70,7 +76,9 @@ def fill_template( tmp_data = [] for pd in plot_datas: - rev_data_points = pd.to_datapoints(fields=fields, path=path) + rev_data_points = pd.to_datapoints( + fields=fields, path=path, csv_header=csv_header + ) if default_plot: rev_data_points = _to_default_data(rev_data_points) tmp_data.extend(rev_data_points) @@ -129,6 +137,7 @@ def plot( fields=None, path=None, embed=False, + csv_header=True, ): if revisions is None: from dvc.repo.plot.data import WORKSPACE_REVISION_NAME @@ -141,7 +150,7 @@ def plot( template_path = _evaluate_templatepath(repo, template) plot_content = fill_template( - repo, datafile, template_path, revisions, fields, path + repo, datafile, template_path, revisions, fields, path, csv_header ) if embed: diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 3b3b3427bf..914001959e 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -6,7 +6,7 @@ from collections import OrderedDict from copy import copy -from funcy import first, cached_property +from funcy import first from ruamel import yaml from dvc.exceptions import DvcException, PathMissingError @@ -151,15 +151,14 @@ def __init__(self, filename, revision, content, **kwargs): self.revision = revision self.content = content - @property - def raw(self): + def raw(self, **kwargs): raise NotImplementedError def _processors(self): return [_filter_fields] def to_datapoints(self, **kwargs): - data = self.raw + data = self.raw(**kwargs) for data_proc in self._processors(): data = data_proc( @@ -172,8 +171,7 @@ def to_datapoints(self, **kwargs): class JSONPlotData(PlotData): - @cached_property - def raw(self): + def raw(self, **kwargs): return json.loads(self.content, object_pairs_hook=OrderedDict) def _processors(self): @@ -186,21 +184,18 @@ def __init__(self, filename, revision, content, delimiter=","): super(CSVPlotData, self).__init__(filename, revision, content) self.delimiter = delimiter - @cached_property - def raw(self): + def raw(self, csv_header=True, **kwargs): first_row = first(csv.reader(io.StringIO(self.content))) - if len(first_row) == 1: + if csv_header: reader = csv.DictReader( - io.StringIO(self.content), - delimiter=self.delimiter, - fieldnames=["value"], + io.StringIO(self.content), delimiter=self.delimiter, ) else: reader = csv.DictReader( io.StringIO(self.content), - skipinitialspace=True, delimiter=self.delimiter, + fieldnames=[str(i) for i in range(len(first_row))], ) fieldnames = reader.fieldnames @@ -213,8 +208,7 @@ def raw(self): class YAMLPLotData(PlotData): - @cached_property - def raw(self): + def raw(self, **kwargs): return yaml.parse(io.StringIO(self.content)) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index cbc1d4126f..c1f940f62d 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -57,12 +57,12 @@ def test_plot_csv_one_column(tmp_dir, scm, dvc): _write_csv(metric, "metric.csv") _run_with_metric(tmp_dir, metric_filename="metric.csv") - plot_string = dvc.plot("metric.csv") + plot_string = dvc.plot("metric.csv", csv_header=False) plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"value": "2", "x": 0, "rev": "workspace"}, - {"value": "3", "x": 1, "rev": "workspace"}, + {"0": "2", "x": 0, "rev": "workspace"}, + {"0": "3", "x": 1, "rev": "workspace"}, ] diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 2482560434..6bc82cedbb 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -41,6 +41,7 @@ def test_metrics_diff(mocker): fields={"column1", "column2"}, path=None, embed=False, + csv_header=True, ) @@ -56,8 +57,9 @@ def test_metrics_show(mocker): "-f", "$.data", "--no-html", - "datafile", "--stdout", + "--no-csv-header", + "datafile", ] ) assert cli_args.func == CmdPlotShow @@ -77,6 +79,7 @@ def test_metrics_show(mocker): fields=None, path="$.data", embed=False, + csv_header=False, ) From 010dd6462aba39ffb3823455e1715a1adc600455 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 24 Apr 2020 16:15:19 +0200 Subject: [PATCH 083/102] plot: improve error message for wrongly structured metric --- dvc/repo/plot/data.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 914001959e..b4019481b5 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -27,7 +27,10 @@ def __init__(self, file): class PlotDataStructureError(DvcException): def __init__(self): - super().__init__("Plot data extraction failed.") + super().__init__( + "Plot data extraction failed. Please see " + "documentation for supported data formats." + ) class JsonParsingError(DvcException): From 58da963c3139c2c8a223f6f8343b155654fa2dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Mon, 27 Apr 2020 15:02:27 +0200 Subject: [PATCH 084/102] plot: match template name exactly, whit suffix appended only --- dvc/template.py | 19 +++++++++++++------ tests/func/test_plot.py | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/dvc/template.py b/dvc/template.py index ad4c4b54d8..29d613b9ff 100644 --- a/dvc/template.py +++ b/dvc/template.py @@ -167,12 +167,19 @@ def get_template(self, path): if os.path.exists(t_path): return t_path - regex = re.compile(re.escape(t_path) + ".*") - for root, _, files in os.walk(self.templates_dir): - for file in files: - full_file = os.path.join(root, file) - if regex.findall(full_file): - return full_file + all_templates = [ + os.path.join(root, file) + for root, _, files in os.walk(self.templates_dir) + for file in files + ] + matches = [ + template + for template in all_templates + if os.path.splitext(template)[0] == t_path + ] + if matches: + assert len(matches) == 1 + return matches[0] raise TemplateNotFound(path) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index c1f940f62d..c1891843da 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -122,7 +122,7 @@ def test_plot_confusion(tmp_dir, dvc): _write_json(tmp_dir, confusion_matrix, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - plot_string = dvc.plot(datafile="metric.json", template="confusion") + plot_string = dvc.plot(datafile="metric.json", template="confusion_matrix") plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ From 241466d4fb49518cbf8f5a5f7fc169e805e36054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Mon, 27 Apr 2020 17:35:11 +0200 Subject: [PATCH 085/102] plot: dmpetrov and ivan review --- dvc/command/plot.py | 21 +++++++++------------ dvc/repo/__init__.py | 9 ++++++--- dvc/repo/plot/__init__.py | 23 +---------------------- dvc/repo/plot/data.py | 9 +++------ dvc/template.py | 5 ++++- tests/func/test_plot.py | 5 +---- 6 files changed, 24 insertions(+), 48 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 1b02f3cc6f..bd78bae8d2 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -5,7 +5,6 @@ from dvc.command.base import append_doc_link, CmdBase, fix_subparsers from dvc.exceptions import DvcException from dvc.repo.plot.data import WORKSPACE_REVISION_NAME -from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -97,8 +96,8 @@ def _revisions(self): def add_parser(subparsers, parent_parser): PLOT_HELP = ( - "For visualisation of metrics stored in structured files (" - "json, csv, tsv)." + "Generating plots for metrics stored in structured files " + "(json, csv, tsv)." ) plot_parser = subparsers.add_parser( @@ -115,9 +114,7 @@ def add_parser(subparsers, parent_parser): fix_subparsers(plot_subparsers) - SHOW_HELP = "Visualize target dvct file using {}.".format( - format_link("https://vega.github.io") - ) + SHOW_HELP = "Plot data from a file." plot_show_parser = plot_subparsers.add_parser( "show", parents=[parent_parser], @@ -155,19 +152,19 @@ def add_parser(subparsers, parent_parser): "--stdout", action="store_true", default=False, - help="Print plot content to stdout", + help="Print output to stdout.", ) plot_show_parser.add_argument( "--no-csv-header", action="store_true", default=False, - help="Provided csv datafile does not have a header.", + help="Provided CSV ot TSV datafile does not have a header.", ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( - "Plot changes in metrics between commits" - " in the DVC repository, or between a commit and the workspace." + "Plot changes between commits in the DVC repository, " + "or between the last commit and the workspace." ) plot_diff_parser = plot_subparsers.add_parser( "diff", @@ -217,12 +214,12 @@ def add_parser(subparsers, parent_parser): "--stdout", action="store_true", default=False, - help="Print plot content to stdout", + help="Print output to stdout.", ) plot_diff_parser.add_argument( "--no-csv-header", action="store_true", default=False, - help="Provided csv datafile does not have a header.", + help="Provided CSV ot TSV datafile does not have a header.", ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index f4740d3b6f..69f708f01a 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -16,7 +16,6 @@ OutputNotFoundError, ) from dvc.path_info import PathInfo -from dvc.template import PlotTemplates from dvc.remote.base import RemoteActionNotImplemented from dvc.utils.fs import path_isin from .graph import check_acyclic, get_pipeline, get_pipelines @@ -114,8 +113,6 @@ def __init__(self, root_dir=None): self._ignore() - self.plot_templates = PlotTemplates(self.dvc_dir) - @property def tree(self): return self._tree @@ -430,6 +427,12 @@ def stages(self): """ return self._collect_stages() + @cached_property + def plot_templates(self): + from dvc.template import PlotTemplates + + return PlotTemplates(self.dvc_dir) + def _collect_stages(self): from dvc.dvcfile import Dvcfile, is_valid_filename diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 1da859aeed..63b28f2421 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -13,7 +13,7 @@ PAGE_HTML = """ - dvc plot + DVC plot @@ -160,27 +160,6 @@ def plot( return plot_content -def infer_result_file(datafile, template_path, embed): - if datafile: - tmp = datafile - else: - tmp = "plot" - - if not embed: - extension = os.path.splitext(template_path)[1] - else: - extension = ".html" - - result_file = os.path.splitext(tmp)[0] + extension - - if result_file == datafile or result_file == template_path: - raise DvcException( - "Cannot create '{}': file already exists, use -r to redefine " - "it".format(result_file) - ) - return result_file - - def _parse_template(template_path, datafile): template_datafiles = Template.parse_data_placeholders(template_path) if datafile: diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index b4019481b5..18ed8d6406 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -51,11 +51,8 @@ def __init__(self, path, revision): class NoMetricInHistoryError(DvcException): - def __init__(self, path, revisions): - super().__init__( - "Could not find '{}' on any of the revisions " - "'{}'".format(path, ", ".join(revisions)) - ) + def __init__(self, path): + super().__init__("Could not find '{}'.".format(path)) def plot_data(filename, revision, content): @@ -254,7 +251,7 @@ def _load_from_revisions(repo, datafile, revisions): raise if not data and exceptions: - raise NoMetricInHistoryError(datafile, revisions) + raise NoMetricInHistoryError(datafile) else: for e in exceptions: logger.warning( diff --git a/dvc/template.py b/dvc/template.py index 29d613b9ff..109ca4723f 100644 --- a/dvc/template.py +++ b/dvc/template.py @@ -160,7 +160,10 @@ def templates_dir(self): @cached_property def default_template(self): - return os.path.join(self.templates_dir, "default.json") + default_plot_path = os.path.join(self.templates_dir, "default.json") + if not os.path.exists(default_plot_path): + raise TemplateNotFound(os.path.relpath(default_plot_path)) + return default_plot_path def get_template(self, path): t_path = os.path.join(self.templates_dir, path) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index c1891843da..980c8434c0 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -231,10 +231,7 @@ def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): # do not warn if none found assert len(caplog.messages) == 0 - assert ( - str(error.value) - == "Could not find 'metric.json' on any of the revisions 'v1'" - ) + assert str(error.value) == "Could not find 'metric.json'." @pytest.fixture() From feb702994e8f64738dc4ee097031a7ac546a6903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 28 Apr 2020 11:45:10 +0200 Subject: [PATCH 086/102] plot: refactor --stdout help message --- dvc/command/plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index bd78bae8d2..65652a234a 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -152,7 +152,7 @@ def add_parser(subparsers, parent_parser): "--stdout", action="store_true", default=False, - help="Print output to stdout.", + help="Print result to stdout.", ) plot_show_parser.add_argument( "--no-csv-header", @@ -214,7 +214,7 @@ def add_parser(subparsers, parent_parser): "--stdout", action="store_true", default=False, - help="Print output to stdout.", + help="Print result to stdout.", ) plot_diff_parser.add_argument( "--no-csv-header", From d8e508a505b399e6a6895b20aaa1238c5cc659e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 28 Apr 2020 17:53:31 +0200 Subject: [PATCH 087/102] plot: move template to repo/plot --- dvc/command/plot.py | 14 ++++++++++++++ dvc/repo/__init__.py | 2 +- dvc/repo/plot/__init__.py | 16 ++++++++++++++-- dvc/{ => repo/plot}/template.py | 0 tests/func/test_plot.py | 2 +- 5 files changed, 30 insertions(+), 4 deletions(-) rename dvc/{ => repo/plot}/template.py (100%) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 65652a234a..44e662fc81 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -54,6 +54,8 @@ def run(self): template=self.args.template, revisions=self._revisions(), fields=fields, + x_field=self.args.x, + y_field=self.args.y, path=jsonpath, embed=not self.args.no_html, csv_header=not self.args.no_csv_header, @@ -147,6 +149,12 @@ def add_parser(subparsers, parent_parser): default=None, help="Choose which fileds or jsonpath to put into plot.", ) + plot_show_parser.add_argument( + "-x", default=None, help="Field name that will be on x axis of plot." + ) + plot_show_parser.add_argument( + "-y", default=None, help="Field name that will be on y axis of plot." + ) plot_show_parser.add_argument( "-o", "--stdout", @@ -209,6 +217,12 @@ def add_parser(subparsers, parent_parser): default=None, help="Choose which filed(s) or jsonpath to put into plot.", ) + plot_diff_parser.add_argument( + "-x", default=None, help="Field name that will be on x axis of plot." + ) + plot_diff_parser.add_argument( + "-y", default=None, help="Field name that will be on y axis of plot." + ) plot_diff_parser.add_argument( "-o", "--stdout", diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 69f708f01a..b9c38f575c 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -429,7 +429,7 @@ def stages(self): @cached_property def plot_templates(self): - from dvc.template import PlotTemplates + from dvc.repo.plot.template import PlotTemplates return PlotTemplates(self.dvc_dir) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 63b28f2421..9e80ef9e98 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -6,7 +6,7 @@ from dvc.exceptions import DvcException from dvc.repo.plot.data import PlotData -from dvc.template import Template, NoDataForTemplateError +from dvc.repo.plot.template import Template, NoDataForTemplateError from dvc.repo import locked logger = logging.getLogger(__name__) @@ -61,6 +61,8 @@ def fill_template( template_path, revisions, fields=None, + x_field=None, + y_field=None, path=None, csv_header=True, ): @@ -135,6 +137,8 @@ def plot( template=None, revisions=None, fields=None, + x_field=None, + y_field=None, path=None, embed=False, csv_header=True, @@ -150,7 +154,15 @@ def plot( template_path = _evaluate_templatepath(repo, template) plot_content = fill_template( - repo, datafile, template_path, revisions, fields, path, csv_header + repo, + datafile, + template_path, + revisions, + fields, + x_field, + y_field, + path, + csv_header, ) if embed: diff --git a/dvc/template.py b/dvc/repo/plot/template.py similarity index 100% rename from dvc/template.py rename to dvc/repo/plot/template.py diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 980c8434c0..56d9b7ca96 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -10,7 +10,7 @@ from dvc.compat import fspath from dvc.repo.plot.data import NoMetricInHistoryError, PlotMetricTypeError -from dvc.template import ( +from dvc.repo.plot.template import ( DefaultLinearTemplate, TemplateNotFound, NoDataForTemplateError, From 3bbbb6e1a02bad0c4c7c50f6e53152df9f3e0ca8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 29 Apr 2020 16:47:45 +0200 Subject: [PATCH 088/102] plot: add -x and -y options --- dvc/command/plot.py | 11 +- dvc/repo/plot/__init__.py | 86 ++++++-------- dvc/repo/plot/data.py | 28 ++++- dvc/repo/plot/template.py | 37 ++++-- tests/func/test_plot.py | 203 ++++++++++++++++++++++---------- tests/unit/command/test_plot.py | 8 ++ 6 files changed, 241 insertions(+), 132 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 44e662fc81..45d02da98a 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -132,7 +132,10 @@ def add_parser(subparsers, parent_parser): help="File to be injected with data.", ) plot_show_parser.add_argument( - "datafile", nargs="?", default=None, help="Data to be visualized." + "datafile", + nargs="?", + default=None, + help="Continuous metrics file to visualize.", ) plot_show_parser.add_argument( "-r", "--result", help="Name of the generated file." @@ -171,8 +174,8 @@ def add_parser(subparsers, parent_parser): plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( - "Plot changes between commits in the DVC repository, " - "or between the last commit and the workspace." + "Plot continuous metrics differences between commits in the DVC " + "repository, or between the last commit and the workspace." ) plot_diff_parser = plot_subparsers.add_parser( "diff", @@ -193,7 +196,7 @@ def add_parser(subparsers, parent_parser): "--datafile", nargs="?", default=None, - help="Data to be visualized.", + help="Continuous metrics file to visualize.", ) plot_diff_parser.add_argument( "-r", "--result", help="Name of the generated file." diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 9e80ef9e98..5dd360cc60 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -1,4 +1,3 @@ -import json import logging import os @@ -66,23 +65,34 @@ def fill_template( path=None, csv_header=True, ): - default_plot = template_path == repo.plot_templates.default_template - template_datafiles = _parse_template(template_path, datafile) + template_datafiles, x_anchor, y_anchor = _parse_template( + template_path, datafile + ) + append_index = x_anchor and not x_field + if append_index: + x_field = PlotData.INDEX_FIELD template_data = {} for datafile in template_datafiles: from dvc.repo.plot.data import _load_from_revisions plot_datas = _load_from_revisions(repo, datafile, revisions) - tmp_data = [] for pd in plot_datas: rev_data_points = pd.to_datapoints( - fields=fields, path=path, csv_header=csv_header + fields=fields, + path=path, + csv_header=csv_header, + append_index=append_index, ) - if default_plot: - rev_data_points = _to_default_data(rev_data_points) + + if y_anchor and not y_field: + all_fields = list(first(rev_data_points).keys()) + all_fields.remove(PlotData.REVISION_FIELD) + if x_field and x_field in all_fields: + all_fields.remove(x_field) + y_field = last(all_fields) tmp_data.extend(rev_data_points) template_data[datafile] = tmp_data @@ -90,44 +100,8 @@ def fill_template( if len(template_data) == 0: raise NoDataForTemplateError(template_path) - filled_template = Template.fill(template_path, template_data, datafile) - - if default_plot: - return _fix_default_template(template_data, filled_template) - - return filled_template - - -def _to_default_data(data_points): - keys = list(first(data_points).keys()) - keys.remove(PlotData.REVISION_FIELD) - data_field = last(keys) - new_data = [] - for index, data_point in enumerate(data_points): - new_data.append( - { - "x": index, - data_field: data_point[data_field], - PlotData.REVISION_FIELD: data_point[PlotData.REVISION_FIELD], - } - ) - return new_data - - -def _fix_default_template(template_data, plot_json): - assert len(template_data) == 1 - datafile, data = first(template_data.items()) - - keys = list(first(data).keys()) - keys.remove(PlotData.REVISION_FIELD) - keys.remove("x") - data_field = first(keys) - - tmp_plot = json.loads(plot_json) - tmp_plot["title"] = datafile - tmp_plot["encoding"]["y"]["field"] = data_field - return json.dumps( - tmp_plot, indent=4, separators=(",", ": "), sort_keys=True + return Template.fill( + template_path, template_data, datafile, x_field, y_field ) @@ -172,10 +146,20 @@ def plot( return plot_content -def _parse_template(template_path, datafile): - template_datafiles = Template.parse_data_placeholders(template_path) - if datafile: +def _parse_template(template_path, priority_datafile): + with open(template_path, "r") as fobj: + tempalte_content = fobj.read() + + template_datafiles = Template.parse_data_placeholders(tempalte_content) + if priority_datafile: if len(template_datafiles) > 1: - raise TooManyDataSourcesError(datafile, template_datafiles) - template_datafiles = {datafile} - return template_datafiles + raise TooManyDataSourcesError( + priority_datafile, template_datafiles + ) + template_datafiles = {priority_datafile} + + return ( + template_datafiles, + Template.X_AXIS_STRING in tempalte_content, + Template.Y_AXIS_STRING in tempalte_content, + ) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 18ed8d6406..a20665b66d 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -143,8 +143,30 @@ def _find_data(data, fields=None, **kwargs): raise PlotDataStructureError() +def _append_index(data_points, append_index=False, **kwargs): + if not append_index: + return data_points + + if PlotData.INDEX_FIELD in first(data_points).keys(): + raise DvcException( + "Cannot append index. Field of same name ('{}') found in data. " + "Use `-x` to specify x axis field.".format(PlotData.INDEX_FIELD) + ) + + for index, data_point in enumerate(data_points): + data_point[PlotData.INDEX_FIELD] = index + return data_points + + +def _append_revision(data_points, revision, **kwargs): + for data_point in data_points: + data_point[PlotData.REVISION_FIELD] = revision + return data_points + + class PlotData: REVISION_FIELD = "rev" + INDEX_FIELD = "index" def __init__(self, filename, revision, content, **kwargs): self.filename = filename @@ -155,7 +177,7 @@ def raw(self, **kwargs): raise NotImplementedError def _processors(self): - return [_filter_fields] + return [_filter_fields, _append_index, _append_revision] def to_datapoints(self, **kwargs): data = self.raw(**kwargs) @@ -164,9 +186,6 @@ def to_datapoints(self, **kwargs): data = data_proc( data, filename=self.filename, revision=self.revision, **kwargs ) - - for data_point in data: - data_point[self.REVISION_FIELD] = self.revision return data @@ -209,6 +228,7 @@ def raw(self, csv_header=True, **kwargs): class YAMLPLotData(PlotData): def raw(self, **kwargs): + # TODO ordered return yaml.parse(io.StringIO(self.content)) diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index 109ca4723f..3da12f62f9 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -29,6 +29,8 @@ class Template: SEPARATORS = (",", ": ") EXTENSION = ".json" METRIC_DATA_STRING = "" + X_AXIS_STRING = "" + Y_AXIS_STRING = "" def __init__(self, templates_dir): self.plot_templates_dir = templates_dir @@ -66,17 +68,15 @@ def load_template(self, path): raise DvcException("Not in repo nor in defaults") @staticmethod - def get_data_placeholders(template_path): + def get_data_placeholders(template_content): regex = re.compile('""]*>"') - with open(template_path, "r") as fobj: - template_content = fobj.read() return regex.findall(template_content) @staticmethod - def parse_data_placeholders(template_path): + def parse_data_placeholders(template_content): data_files = { Template.get_datafile(m) - for m in Template.get_data_placeholders(template_path) + for m in Template.get_data_placeholders(template_content) } return {df for df in data_files if df} @@ -91,11 +91,13 @@ def get_datafile(placeholder_string): ) @staticmethod - def fill(template_path, data, priority_datafile=None): + def fill( + template_path, data, priority_datafile=None, x_field=None, y_field=None + ): with open(template_path, "r") as fobj: result_content = fobj.read() - for placeholder in Template.get_data_placeholders(template_path): + for placeholder in Template.get_data_placeholders(result_content): file = Template.get_datafile(placeholder) if not file or priority_datafile: @@ -113,6 +115,15 @@ def fill(template_path, data, priority_datafile=None): ), ) + if Template.X_AXIS_STRING in result_content and x_field: + result_content = result_content.replace( + Template.X_AXIS_STRING, x_field + ) + if Template.Y_AXIS_STRING in result_content and y_field: + result_content = result_content.replace( + Template.Y_AXIS_STRING, y_field + ) + return result_content @@ -124,8 +135,8 @@ class DefaultLinearTemplate(Template): "data": {"values": Template.METRIC_DATA_STRING}, "mark": {"type": "line"}, "encoding": { - "x": {"field": "x", "type": "quantitative"}, - "y": {"field": "y", "type": "quantitative"}, + "x": {"field": Template.X_AXIS_STRING, "type": "quantitative"}, + "y": {"field": Template.Y_AXIS_STRING, "type": "quantitative"}, "color": {"field": "rev", "type": "nominal"}, }, } @@ -139,11 +150,15 @@ class DefaultConfusionTemplate(Template): "mark": "rect", "encoding": { "x": { - "field": "predicted", + "field": Template.X_AXIS_STRING, + "type": "nominal", + "sort": "ascending", + }, + "y": { + "field": Template.Y_AXIS_STRING, "type": "nominal", "sort": "ascending", }, - "y": {"field": "actual", "type": "nominal", "sort": "ascending"}, "color": {"aggregate": "count", "type": "quantitative"}, "facet": {"field": "rev", "type": "nominal"}, }, diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 56d9b7ca96..bc5231bed5 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -9,9 +9,12 @@ from funcy import first from dvc.compat import fspath -from dvc.repo.plot.data import NoMetricInHistoryError, PlotMetricTypeError +from dvc.repo.plot.data import ( + NoMetricInHistoryError, + PlotMetricTypeError, + PlotData, +) from dvc.repo.plot.template import ( - DefaultLinearTemplate, TemplateNotFound, NoDataForTemplateError, ) @@ -32,9 +35,9 @@ def _run_with_metric(tmp_dir, metric_filename, commit=None, tag=None): tmp_dir.dvc.scm.tag(tag) -def _write_csv(metric, filename): +def _write_csv(metric, filename, header=True): with open(filename, "w", newline="") as csvobj: - if all([len(e) > 1 for e in metric]): + if header: writer = csv.DictWriter( csvobj, fieldnames=list(first(metric).keys()) ) @@ -54,16 +57,18 @@ def _write_json(tmp_dir, metric, filename): def test_plot_csv_one_column(tmp_dir, scm, dvc): # for single column write with no header, hence `value` in result metric = [{"val": 2}, {"val": 3}] - _write_csv(metric, "metric.csv") + _write_csv(metric, "metric.csv", header=False) _run_with_metric(tmp_dir, metric_filename="metric.csv") plot_string = dvc.plot("metric.csv", csv_header=False) plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"0": "2", "x": 0, "rev": "workspace"}, - {"0": "3", "x": 1, "rev": "workspace"}, + {"0": "2", PlotData.INDEX_FIELD: 0, "rev": "workspace"}, + {"0": "3", PlotData.INDEX_FIELD: 1, "rev": "workspace"}, ] + assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "0" def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): @@ -78,9 +83,54 @@ def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"val": "2", "x": 0, "rev": "workspace"}, - {"val": "3", "x": 1, "rev": "workspace"}, + { + "val": "2", + PlotData.INDEX_FIELD: 0, + "rev": "workspace", + "first_val": "100", + "second_val": "100", + }, + { + "val": "3", + PlotData.INDEX_FIELD: 1, + "rev": "workspace", + "first_val": "200", + "second_val": "300", + }, + ] + assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "val" + + +def test_plot_csv_choose_axes(tmp_dir, scm, dvc): + metric = [ + OrderedDict([("first_val", 100), ("second_val", 100), ("val", 2)]), + OrderedDict([("first_val", 200), ("second_val", 300), ("val", 3)]), + ] + _write_csv(metric, "metric.csv") + _run_with_metric(tmp_dir, metric_filename="metric.csv") + + plot_string = dvc.plot( + "metric.csv", x_field="first_val", y_field="second_val" + ) + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + { + "val": "2", + "rev": "workspace", + "first_val": "100", + "second_val": "100", + }, + { + "val": "3", + "rev": "workspace", + "first_val": "200", + "second_val": "300", + }, ] + assert plot_content["encoding"]["x"]["field"] == "first_val" + assert plot_content["encoding"]["y"]["field"] == "second_val" def test_plot_json_single_val(tmp_dir, scm, dvc): @@ -92,9 +142,11 @@ def test_plot_json_single_val(tmp_dir, scm, dvc): plot_json = json.loads(plot_string) assert plot_json["data"]["values"] == [ - {"val": 2, "x": 0, "rev": "workspace"}, - {"val": 3, "x": 1, "rev": "workspace"}, + {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, + {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, ] + assert plot_json["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_json["encoding"]["y"]["field"] == "val" def test_plot_json_multiple_val(tmp_dir, scm, dvc): @@ -109,9 +161,21 @@ def test_plot_json_multiple_val(tmp_dir, scm, dvc): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"val": 2, "x": 0, "rev": "workspace"}, - {"val": 3, "x": 1, "rev": "workspace"}, + { + "val": 2, + PlotData.INDEX_FIELD: 0, + "first_val": 100, + "rev": "workspace", + }, + { + "val": 3, + PlotData.INDEX_FIELD: 1, + "first_val": 200, + "rev": "workspace", + }, ] + assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "val" def test_plot_confusion(tmp_dir, dvc): @@ -122,25 +186,32 @@ def test_plot_confusion(tmp_dir, dvc): _write_json(tmp_dir, confusion_matrix, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - plot_string = dvc.plot(datafile="metric.json", template="confusion_matrix") + plot_string = dvc.plot( + datafile="metric.json", + template="confusion_matrix", + x_field="predicted", + y_field="actual", + ) plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ {"predicted": "B", "actual": "A", "rev": "workspace"}, {"predicted": "A", "actual": "A", "rev": "workspace"}, ] + assert plot_content["encoding"]["x"]["field"] == "predicted" + assert plot_content["encoding"]["y"]["field"] == "actual" def test_plot_multiple_revs_default(tmp_dir, scm, dvc): - metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + metric_1 = [{"y": 2}, {"y": 3}] _write_json(tmp_dir, metric_1, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - metric_2 = [{"x": 1, "y": 3}, {"x": 2, "y": 5}] + metric_2 = [{"y": 3}, {"y": 5}] _write_json(tmp_dir, metric_2, "metric.json") _run_with_metric(tmp_dir, "metric.json", "second", "v2") - metric_3 = [{"x": 1, "y": 5}, {"x": 2, "y": 6}] + metric_3 = [{"y": 5}, {"y": 6}] _write_json(tmp_dir, metric_3, "metric.json") _run_with_metric(tmp_dir, "metric.json", "third") @@ -150,13 +221,15 @@ def test_plot_multiple_revs_default(tmp_dir, scm, dvc): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"y": 5, "x": 0, "rev": "HEAD"}, - {"y": 6, "x": 1, "rev": "HEAD"}, - {"y": 3, "x": 0, "rev": "v2"}, - {"y": 5, "x": 1, "rev": "v2"}, - {"y": 2, "x": 0, "rev": "v1"}, - {"y": 3, "x": 1, "rev": "v1"}, + {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, + {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, + {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "v2"}, + {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "v2"}, + {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, + {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, ] + assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" def test_plot_multiple_revs(tmp_dir, scm, dvc): @@ -164,15 +237,15 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): fspath(tmp_dir / ".dvc" / "plot" / "default.json"), "template.json" ) - metric_1 = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + metric_1 = [{"y": 2}, {"y": 3}] _write_json(tmp_dir, metric_1, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - metric_2 = [{"x": 1, "y": 3}, {"x": 2, "y": 5}] + metric_2 = [{"y": 3}, {"y": 5}] _write_json(tmp_dir, metric_2, "metric.json") _run_with_metric(tmp_dir, "metric.json", "second", "v2") - metric_3 = [{"x": 1, "y": 5}, {"x": 2, "y": 6}] + metric_3 = [{"y": 5}, {"y": 6}] _write_json(tmp_dir, metric_3, "metric.json") _run_with_metric(tmp_dir, "metric.json", "third") @@ -184,13 +257,15 @@ def test_plot_multiple_revs(tmp_dir, scm, dvc): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"y": 5, "x": 1, "rev": "HEAD"}, - {"y": 6, "x": 2, "rev": "HEAD"}, - {"y": 3, "x": 1, "rev": "v2"}, - {"y": 5, "x": 2, "rev": "v2"}, - {"y": 2, "x": 1, "rev": "v1"}, - {"y": 3, "x": 2, "rev": "v1"}, + {"y": 5, PlotData.INDEX_FIELD: 0, "rev": "HEAD"}, + {"y": 6, PlotData.INDEX_FIELD: 1, "rev": "HEAD"}, + {"y": 3, PlotData.INDEX_FIELD: 0, "rev": "v2"}, + {"y": 5, PlotData.INDEX_FIELD: 1, "rev": "v2"}, + {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1"}, + {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1"}, ] + assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): @@ -211,9 +286,11 @@ def test_plot_even_if_metric_missing(tmp_dir, scm, dvc, caplog): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"y": 2, "x": 0, "rev": "v2"}, - {"y": 3, "x": 1, "rev": "v2"}, + {"y": 2, PlotData.INDEX_FIELD: 0, "rev": "v2"}, + {"y": 3, PlotData.INDEX_FIELD: 1, "rev": "v2"}, ] + assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "y" def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): @@ -249,13 +326,17 @@ def test_custom_template(tmp_dir, scm, dvc, custom_template): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - plot_string = dvc.plot("metric.json", fspath(custom_template)) + plot_string = dvc.plot( + "metric.json", fspath(custom_template), x_field="a", y_field="b" + ) plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ {"a": 1, "b": 2, "rev": "workspace"}, {"a": 2, "b": 3, "rev": "workspace"}, ] + assert plot_content["encoding"]["x"]["field"] == "a" + assert plot_content["encoding"]["y"]["field"] == "b" def _replace(path, src, dst): @@ -273,13 +354,20 @@ def test_custom_template_with_specified_data( _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") - plot_string = dvc.plot(datafile=None, template=fspath(custom_template)) + plot_string = dvc.plot( + datafile=None, + template=fspath(custom_template), + x_field="a", + y_field="b", + ) plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ {"a": 1, "b": 2, "rev": "workspace"}, {"a": 2, "b": 3, "rev": "workspace"}, ] + assert plot_content["encoding"]["x"]["field"] == "a" + assert plot_content["encoding"]["y"]["field"] == "b" def test_plot_override_specified_data_source(tmp_dir, scm, dvc): @@ -298,7 +386,7 @@ def test_plot_override_specified_data_source(tmp_dir, scm, dvc): _run_with_metric(tmp_dir, "metric2.json", "init", "v1") plot_string = dvc.plot( - datafile="metric2.json", template="newtemplate.json", + datafile="metric2.json", template="newtemplate.json", x_field="a" ) plot_content = json.loads(plot_string) @@ -306,25 +394,8 @@ def test_plot_override_specified_data_source(tmp_dir, scm, dvc): {"a": 1, "b": 2, "rev": "workspace"}, {"a": 2, "b": 3, "rev": "workspace"}, ] - - -def test_should_embed_vega_json_template(tmp_dir, scm, dvc): - template = DefaultLinearTemplate.DEFAULT_CONTENT - template["data"] = {"values": ""} - - (tmp_dir / "template.json").write_text(json.dumps(template)) - - metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - _write_json(tmp_dir, metric, "metric.json") - _run_with_metric(tmp_dir, "metric.json", "init", "v1") - - plot_string = dvc.plot("metric.json", "template.json", embed=False) - - plot_content = json.loads(plot_string) - assert [ - {"x": 1, "y": 2, "rev": "workspace"}, - {"x": 2, "y": 3, "rev": "workspace"}, - ] == plot_content["data"]["values"] + assert plot_content["encoding"]["x"]["field"] == "a" + assert plot_content["encoding"]["y"]["field"] == "b" def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): @@ -354,7 +425,11 @@ def test_plot_choose_columns(tmp_dir, scm, dvc, custom_template): _run_with_metric(tmp_dir, "metric.json", "init", "v1") plot_string = dvc.plot( - "metric.json", fspath(custom_template), fields={"b", "c"}, + "metric.json", + fspath(custom_template), + fields={"b", "c"}, + x_field="b", + y_field="c", ) plot_content = json.loads(plot_string) @@ -362,6 +437,8 @@ def test_plot_choose_columns(tmp_dir, scm, dvc, custom_template): {"b": 2, "c": 3, "rev": "workspace"}, {"b": 3, "c": 4, "rev": "workspace"}, ] + assert plot_content["encoding"]["x"]["field"] == "b" + assert plot_content["encoding"]["y"]["field"] == "c" def test_plot_default_choose_column(tmp_dir, scm, dvc): @@ -373,9 +450,11 @@ def test_plot_default_choose_column(tmp_dir, scm, dvc): plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ - {"x": 0, "b": 2, "rev": "workspace"}, - {"x": 1, "b": 3, "rev": "workspace"}, + {PlotData.INDEX_FIELD: 0, "b": 2, "rev": "workspace"}, + {PlotData.INDEX_FIELD: 1, "b": 3, "rev": "workspace"}, ] + assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD + assert plot_content["encoding"]["y"]["field"] == "b" def test_plot_embed(tmp_dir, scm, dvc): @@ -383,13 +462,13 @@ def test_plot_embed(tmp_dir, scm, dvc): _write_json(tmp_dir, metric, "metric.json") _run_with_metric(tmp_dir, "metric.json", "first run") - plot_string = dvc.plot("metric.json", embed=True) + plot_string = dvc.plot("metric.json", embed=True, y_field="val") page_content = BeautifulSoup(plot_string) data_dump = json.dumps( [ - {"val": 2, "x": 0, "rev": "workspace"}, - {"val": 3, "x": 1, "rev": "workspace"}, + {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, + {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, ], sort_keys=True, ) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 6bc82cedbb..54256f8f97 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -19,6 +19,10 @@ def test_metrics_diff(mocker): "column1,column2", "--no-html", "--stdout", + "-x", + "x_field", + "-y", + "y_field", "HEAD", "tag1", "tag2", @@ -41,6 +45,8 @@ def test_metrics_diff(mocker): fields={"column1", "column2"}, path=None, embed=False, + x_field="x_field", + y_field="y_field", csv_header=True, ) @@ -79,6 +85,8 @@ def test_metrics_show(mocker): fields=None, path="$.data", embed=False, + x_field=None, + y_field=None, csv_header=False, ) From a51cce21cf743c0e2bbe1134519459c7e9082a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 29 Apr 2020 16:48:51 +0200 Subject: [PATCH 089/102] plot: add -x and -y options --- dvc/command/plot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 45d02da98a..164dfd7a6f 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -153,10 +153,10 @@ def add_parser(subparsers, parent_parser): help="Choose which fileds or jsonpath to put into plot.", ) plot_show_parser.add_argument( - "-x", default=None, help="Field name that will be on x axis of plot." + "-x", default=None, help="Field that will be on x axis of plot." ) plot_show_parser.add_argument( - "-y", default=None, help="Field name that will be on y axis of plot." + "-y", default=None, help="Field that will be on y axis of plot." ) plot_show_parser.add_argument( "-o", @@ -221,10 +221,10 @@ def add_parser(subparsers, parent_parser): help="Choose which filed(s) or jsonpath to put into plot.", ) plot_diff_parser.add_argument( - "-x", default=None, help="Field name that will be on x axis of plot." + "-x", default=None, help="Field that will be on x axis of plot." ) plot_diff_parser.add_argument( - "-y", default=None, help="Field name that will be on y axis of plot." + "-y", default=None, help="Field that will be on y axis of plot." ) plot_diff_parser.add_argument( "-o", From fee7a2f166da12b5b13605315c08d259debd3710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 29 Apr 2020 17:06:58 +0200 Subject: [PATCH 090/102] plot: command: order change --- dvc/command/plot.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 164dfd7a6f..1028f79885 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -140,12 +140,6 @@ def add_parser(subparsers, parent_parser): plot_show_parser.add_argument( "-r", "--result", help="Name of the generated file." ) - plot_show_parser.add_argument( - "--no-html", - action="store_true", - default=False, - help="Do not wrap vega plot json with HTML.", - ) plot_show_parser.add_argument( "-f", "--fields", @@ -171,6 +165,12 @@ def add_parser(subparsers, parent_parser): default=False, help="Provided CSV ot TSV datafile does not have a header.", ) + plot_show_parser.add_argument( + "--no-html", + action="store_true", + default=False, + help="Do not wrap vega plot json with HTML.", + ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -207,13 +207,6 @@ def add_parser(subparsers, parent_parser): default=None, help="Git revisions to plot from", ) - - plot_diff_parser.add_argument( - "--no-html", - action="store_true", - default=False, - help="Do not wrap vega plot json with HTML.", - ) plot_diff_parser.add_argument( "-f", "--fields", @@ -239,4 +232,10 @@ def add_parser(subparsers, parent_parser): default=False, help="Provided CSV ot TSV datafile does not have a header.", ) + plot_diff_parser.add_argument( + "--no-html", + action="store_true", + default=False, + help="Do not wrap vega plot json with HTML.", + ) plot_diff_parser.set_defaults(func=CmdPlotDiff) From 5f95cca6daaee215584ef3580096bddec74d06b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 29 Apr 2020 17:21:21 +0200 Subject: [PATCH 091/102] plot: scatter --- dvc/repo/plot/template.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index 3da12f62f9..4f4a8c677a 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -165,9 +165,27 @@ class DefaultConfusionTemplate(Template): } +class DefaultScatterTemplate(Template): + TEMPLATE_NAME = "scatter" + DEFAULT_CONTENT = { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": {"values": Template.METRIC_DATA_STRING}, + "mark": "point", + "encoding": { + "x": {"field": Template.X_AXIS_STRING, "type": "quantitative"}, + "y": {"field": Template.Y_AXIS_STRING, "type": "quantitative"}, + "color": {"field": "rev", "type": "nominal"}, + }, + } + + class PlotTemplates: TEMPLATES_DIR = "plot" - TEMPLATES = [DefaultLinearTemplate, DefaultConfusionTemplate] + TEMPLATES = [ + DefaultLinearTemplate, + DefaultConfusionTemplate, + DefaultScatterTemplate, + ] @cached_property def templates_dir(self): From a39a741efcb729b976de4feebe19fb39e321bac6 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 30 Apr 2020 12:01:08 +0200 Subject: [PATCH 092/102] plot: rename confusion matrix template, new name generation format --- dvc/command/plot.py | 27 +++++++++++--------------- dvc/repo/plot/__init__.py | 9 +++++++-- dvc/repo/plot/template.py | 34 ++++++++++++++++----------------- tests/func/test_plot.py | 2 +- tests/unit/command/test_plot.py | 2 +- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 1028f79885..d3027a2738 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -21,16 +21,11 @@ def _result_file(self): base = self._result_basename() result_file = base + extension - if os.path.exists(result_file): - raise DvcException( - "Cannot create '{}': file already exists, use -r to redefine " - "it".format(result_file) - ) return result_file def _result_basename(self): if self.args.datafile: - return os.path.splitext(self.args.datafile)[0] + return self.args.datafile return "plot" def _result_extension(self): @@ -43,11 +38,11 @@ def _result_extension(self): def run(self): fields = None jsonpath = None - if self.args.fields: - if self.args.fields.startswith("$"): - jsonpath = self.args.fields + if self.args.filter: + if self.args.filter.startswith("$"): + jsonpath = self.args.filter else: - fields = set(self.args.fields.split(",")) + fields = set(self.args.filter.split(",")) try: plot_string = self.repo.plot( datafile=self.args.datafile, @@ -142,15 +137,15 @@ def add_parser(subparsers, parent_parser): ) plot_show_parser.add_argument( "-f", - "--fields", + "--filter", default=None, help="Choose which fileds or jsonpath to put into plot.", ) plot_show_parser.add_argument( - "-x", default=None, help="Field that will be on x axis of plot." + "-x", default=None, help="Field name for x axis." ) plot_show_parser.add_argument( - "-y", default=None, help="Field that will be on y axis of plot." + "-y", default=None, help="Field name for y axis." ) plot_show_parser.add_argument( "-o", @@ -209,15 +204,15 @@ def add_parser(subparsers, parent_parser): ) plot_diff_parser.add_argument( "-f", - "--fields", + "--filter", default=None, help="Choose which filed(s) or jsonpath to put into plot.", ) plot_diff_parser.add_argument( - "-x", default=None, help="Field that will be on x axis of plot." + "-x", default=None, help="Field name for x axis." ) plot_diff_parser.add_argument( - "-y", default=None, help="Field that will be on y axis of plot." + "-y", default=None, help="Field name for y axis." ) plot_diff_parser.add_argument( "-o", diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 5dd360cc60..cf7df39b17 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -65,6 +65,11 @@ def fill_template( path=None, csv_header=True, ): + if x_field and fields: + fields.add(x_field) + + if y_field and fields: + fields.add(y_field) template_datafiles, x_anchor, y_anchor = _parse_template( template_path, datafile @@ -160,6 +165,6 @@ def _parse_template(template_path, priority_datafile): return ( template_datafiles, - Template.X_AXIS_STRING in tempalte_content, - Template.Y_AXIS_STRING in tempalte_content, + Template.X_AXIS_ANCHOR in tempalte_content, + Template.Y_AXIS_ANCHOR in tempalte_content, ) diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index 4f4a8c677a..bfd9da4dfc 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -28,9 +28,9 @@ class Template: INDENT = 4 SEPARATORS = (",", ": ") EXTENSION = ".json" - METRIC_DATA_STRING = "" - X_AXIS_STRING = "" - Y_AXIS_STRING = "" + METRIC_DATA_ANCHOR = "" + X_AXIS_ANCHOR = "" + Y_AXIS_ANCHOR = "" def __init__(self, templates_dir): self.plot_templates_dir = templates_dir @@ -115,13 +115,13 @@ def fill( ), ) - if Template.X_AXIS_STRING in result_content and x_field: + if Template.X_AXIS_ANCHOR in result_content and x_field: result_content = result_content.replace( - Template.X_AXIS_STRING, x_field + Template.X_AXIS_ANCHOR, x_field ) - if Template.Y_AXIS_STRING in result_content and y_field: + if Template.Y_AXIS_ANCHOR in result_content and y_field: result_content = result_content.replace( - Template.Y_AXIS_STRING, y_field + Template.Y_AXIS_ANCHOR, y_field ) return result_content @@ -132,30 +132,30 @@ class DefaultLinearTemplate(Template): DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": {"values": Template.METRIC_DATA_STRING}, + "data": {"values": Template.METRIC_DATA_ANCHOR}, "mark": {"type": "line"}, "encoding": { - "x": {"field": Template.X_AXIS_STRING, "type": "quantitative"}, - "y": {"field": Template.Y_AXIS_STRING, "type": "quantitative"}, + "x": {"field": Template.X_AXIS_ANCHOR, "type": "quantitative"}, + "y": {"field": Template.Y_AXIS_ANCHOR, "type": "quantitative"}, "color": {"field": "rev", "type": "nominal"}, }, } class DefaultConfusionTemplate(Template): - TEMPLATE_NAME = "confusion_matrix" + TEMPLATE_NAME = "confusion" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": {"values": Template.METRIC_DATA_STRING}, + "data": {"values": Template.METRIC_DATA_ANCHOR}, "mark": "rect", "encoding": { "x": { - "field": Template.X_AXIS_STRING, + "field": Template.X_AXIS_ANCHOR, "type": "nominal", "sort": "ascending", }, "y": { - "field": Template.Y_AXIS_STRING, + "field": Template.Y_AXIS_ANCHOR, "type": "nominal", "sort": "ascending", }, @@ -169,11 +169,11 @@ class DefaultScatterTemplate(Template): TEMPLATE_NAME = "scatter" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "data": {"values": Template.METRIC_DATA_STRING}, + "data": {"values": Template.METRIC_DATA_ANCHOR}, "mark": "point", "encoding": { - "x": {"field": Template.X_AXIS_STRING, "type": "quantitative"}, - "y": {"field": Template.Y_AXIS_STRING, "type": "quantitative"}, + "x": {"field": Template.X_AXIS_ANCHOR, "type": "quantitative"}, + "y": {"field": Template.Y_AXIS_ANCHOR, "type": "quantitative"}, "color": {"field": "rev", "type": "nominal"}, }, } diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index bc5231bed5..f219e4fdeb 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -188,7 +188,7 @@ def test_plot_confusion(tmp_dir, dvc): plot_string = dvc.plot( datafile="metric.json", - template="confusion_matrix", + template="confusion", x_field="predicted", y_field="actual", ) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 54256f8f97..e43e6ac817 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -15,7 +15,7 @@ def test_metrics_diff(mocker): "template", "-d", "datafile", - "--field", + "--filter", "column1,column2", "--no-html", "--stdout", From 4f37355f3fe6e087563e67cf1de3986532cbe01c Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 30 Apr 2020 14:40:38 +0200 Subject: [PATCH 093/102] plot: add title anchor --- dvc/repo/plot/template.py | 9 +++++++++ tests/func/test_plot.py | 1 + 2 files changed, 10 insertions(+) diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index bfd9da4dfc..67b77e2c4f 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -31,6 +31,7 @@ class Template: METRIC_DATA_ANCHOR = "" X_AXIS_ANCHOR = "" Y_AXIS_ANCHOR = "" + TITLE_ANCHOR = "" def __init__(self, templates_dir): self.plot_templates_dir = templates_dir @@ -105,6 +106,11 @@ def fill( else: key = file + if Template.TITLE_ANCHOR in result_content: + result_content = result_content.replace( + Template.TITLE_ANCHOR, key + ) + result_content = result_content.replace( placeholder, json.dumps( @@ -133,6 +139,7 @@ class DefaultLinearTemplate(Template): DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": {"values": Template.METRIC_DATA_ANCHOR}, + "title": Template.TITLE_ANCHOR, "mark": {"type": "line"}, "encoding": { "x": {"field": Template.X_AXIS_ANCHOR, "type": "quantitative"}, @@ -147,6 +154,7 @@ class DefaultConfusionTemplate(Template): DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": {"values": Template.METRIC_DATA_ANCHOR}, + "title": Template.TITLE_ANCHOR, "mark": "rect", "encoding": { "x": { @@ -170,6 +178,7 @@ class DefaultScatterTemplate(Template): DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": {"values": Template.METRIC_DATA_ANCHOR}, + "title": Template.TITLE_ANCHOR, "mark": "point", "encoding": { "x": {"field": Template.X_AXIS_ANCHOR, "type": "quantitative"}, diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index f219e4fdeb..9458bac0e1 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -200,6 +200,7 @@ def test_plot_confusion(tmp_dir, dvc): ] assert plot_content["encoding"]["x"]["field"] == "predicted" assert plot_content["encoding"]["y"]["field"] == "actual" + assert plot_content["title"] == "metric.json" def test_plot_multiple_revs_default(tmp_dir, scm, dvc): From 39a166268fdd4a66c45565d3806f9480efbc2670 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 30 Apr 2020 15:49:53 +0200 Subject: [PATCH 094/102] plot: review from jorgeorpinel --- dvc/command/plot.py | 22 +++++++++++----------- dvc/repo/plot/__init__.py | 6 +++--- dvc/repo/plot/data.py | 6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index d3027a2738..54886e2c8f 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -93,8 +93,8 @@ def _revisions(self): def add_parser(subparsers, parent_parser): PLOT_HELP = ( - "Generating plots for metrics stored in structured files " - "(json, csv, tsv)." + "Generating plots for continuous metrics stored in structured files " + "(JSON, CSV, TSV)." ) plot_parser = subparsers.add_parser( @@ -111,7 +111,7 @@ def add_parser(subparsers, parent_parser): fix_subparsers(plot_subparsers) - SHOW_HELP = "Plot data from a file." + SHOW_HELP = "Generate a plot image file from a continuous metrics file." plot_show_parser = plot_subparsers.add_parser( "show", parents=[parent_parser], @@ -139,7 +139,7 @@ def add_parser(subparsers, parent_parser): "-f", "--filter", default=None, - help="Choose which fileds or jsonpath to put into plot.", + help="Choose which field(s) or JSONPath to include in the plot.", ) plot_show_parser.add_argument( "-x", default=None, help="Field name for x axis." @@ -152,19 +152,19 @@ def add_parser(subparsers, parent_parser): "--stdout", action="store_true", default=False, - help="Print result to stdout.", + help="Print plot specification to stdout.", ) plot_show_parser.add_argument( "--no-csv-header", action="store_true", default=False, - help="Provided CSV ot TSV datafile does not have a header.", + help="Required when CSV or TSV datafile does not have a header.", ) plot_show_parser.add_argument( "--no-html", action="store_true", default=False, - help="Do not wrap vega plot json with HTML.", + help="Do not wrap Vega plot JSON with HTML.", ) plot_show_parser.set_defaults(func=CmdPlotShow) @@ -184,7 +184,7 @@ def add_parser(subparsers, parent_parser): "--template", nargs="?", default=None, - help=("File to be injected with data."), + help="File to be injected with data.", ) plot_diff_parser.add_argument( "-d", @@ -206,7 +206,7 @@ def add_parser(subparsers, parent_parser): "-f", "--filter", default=None, - help="Choose which filed(s) or jsonpath to put into plot.", + help="Choose which field(s) or JSONPath to include in the plot.", ) plot_diff_parser.add_argument( "-x", default=None, help="Field name for x axis." @@ -219,7 +219,7 @@ def add_parser(subparsers, parent_parser): "--stdout", action="store_true", default=False, - help="Print result to stdout.", + help="Print plot specification to stdout.", ) plot_diff_parser.add_argument( "--no-csv-header", @@ -231,6 +231,6 @@ def add_parser(subparsers, parent_parser): "--no-html", action="store_true", default=False, - help="Do not wrap vega plot json with HTML.", + help="Do not wrap Vega plot JSON with HTML.", ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index cf7df39b17..65e3a25aac 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -12,7 +12,7 @@ PAGE_HTML = """ - DVC plot + DVC Plot @@ -32,8 +32,8 @@ class TooManyDataSourcesError(DvcException): def __init__(self, datafile, template_datafiles): super().__init__( - "Unable to reason which of possible data sources: '{}' " - "should be replaced with '{}'".format( + "Unable to infer which of possible data sources: '{}' " + "should be replaced with '{}'.".format( ", ".join(template_datafiles), datafile ) ) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index a20665b66d..5eafc91b7a 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -21,7 +21,7 @@ class PlotMetricTypeError(DvcException): def __init__(self, file): super().__init__( "'{}' - file type error\n" - "Only json, yaml, csv and tsv types are supported.".format(file) + "Only JSON, YAML, CSV and TSV formats are supported.".format(file) ) @@ -37,7 +37,7 @@ class JsonParsingError(DvcException): def __init__(self, file): super().__init__( "Failed to infer data structure from '{}'. Did you forget " - "to specify jsonpath?".format(file) + "to specify JSONpath?".format(file) ) @@ -46,7 +46,7 @@ def __init__(self, path, revision): self.path = path self.revision = revision super().__init__( - "Could not find '{}' on revision " "'{}'".format(path, revision) + "Could not find '{}' on revision '{}'".format(path, revision) ) From 84fd893940ac14972048a3b35e9e548e81566340 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 30 Apr 2020 19:05:48 +0200 Subject: [PATCH 095/102] plot: rename filter and result options to select and file --- dvc/command/plot.py | 24 ++++++++++++------------ tests/unit/command/test_plot.py | 8 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 54886e2c8f..1849448971 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -14,8 +14,8 @@ def _revisions(self): raise NotImplementedError def _result_file(self): - if self.args.result: - return self.args.result + if self.args.file: + return self.args.file extension = self._result_extension() base = self._result_basename() @@ -38,11 +38,11 @@ def _result_extension(self): def run(self): fields = None jsonpath = None - if self.args.filter: - if self.args.filter.startswith("$"): - jsonpath = self.args.filter + if self.args.select: + if self.args.select.startswith("$"): + jsonpath = self.args.select else: - fields = set(self.args.filter.split(",")) + fields = set(self.args.select.split(",")) try: plot_string = self.repo.plot( datafile=self.args.datafile, @@ -133,11 +133,11 @@ def add_parser(subparsers, parent_parser): help="Continuous metrics file to visualize.", ) plot_show_parser.add_argument( - "-r", "--result", help="Name of the generated file." + "-f", "--file", help="Name of the generated file." ) plot_show_parser.add_argument( - "-f", - "--filter", + "-s", + "--select", default=None, help="Choose which field(s) or JSONPath to include in the plot.", ) @@ -194,7 +194,7 @@ def add_parser(subparsers, parent_parser): help="Continuous metrics file to visualize.", ) plot_diff_parser.add_argument( - "-r", "--result", help="Name of the generated file." + "-f", "--file", help="Name of the generated file." ) plot_diff_parser.add_argument( "revisions", @@ -203,8 +203,8 @@ def add_parser(subparsers, parent_parser): help="Git revisions to plot from", ) plot_diff_parser.add_argument( - "-f", - "--filter", + "-s", + "--select", default=None, help="Choose which field(s) or JSONPath to include in the plot.", ) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index e43e6ac817..6804e114be 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -9,13 +9,13 @@ def test_metrics_diff(mocker): [ "plot", "diff", - "-r", + "--file", "result.extension", "-t", "template", "-d", "datafile", - "--filter", + "--select", "column1,column2", "--no-html", "--stdout", @@ -56,11 +56,11 @@ def test_metrics_show(mocker): [ "plot", "show", - "-r", + "-f", "result.extension", "-t", "template", - "-f", + "-s", "$.data", "--no-html", "--stdout", From b7f68616ac102789b07a7e005f1c562fa938a168 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 30 Apr 2020 23:16:38 +0200 Subject: [PATCH 096/102] plot: add --title, --x-title, --y-title --- dvc/command/plot.py | 21 +++++- dvc/repo/plot/__init__.py | 54 +++++++-------- dvc/repo/plot/template.py | 119 +++++++++++++++++++++++--------- tests/func/test_plot.py | 14 +++- tests/unit/command/test_plot.py | 12 ++++ 5 files changed, 153 insertions(+), 67 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 1849448971..e7acaadfb8 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -54,6 +54,9 @@ def run(self): path=jsonpath, embed=not self.args.no_html, csv_header=not self.args.no_csv_header, + title=self.args.title, + x_title=self.args.x_title, + y_title=self.args.y_title, ) if self.args.stdout: @@ -133,7 +136,7 @@ def add_parser(subparsers, parent_parser): help="Continuous metrics file to visualize.", ) plot_show_parser.add_argument( - "-f", "--file", help="Name of the generated file." + "-f", "--file", default=None, help="Name of the generated file." ) plot_show_parser.add_argument( "-s", @@ -166,6 +169,13 @@ def add_parser(subparsers, parent_parser): default=False, help="Do not wrap Vega plot JSON with HTML.", ) + plot_show_parser.add_argument("--title", default=None, help="Plot title.") + plot_show_parser.add_argument( + "--x-title", default=None, help="X axis " "title." + ) + plot_show_parser.add_argument( + "--y-title", default=None, help="Y axis " "title." + ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -194,7 +204,7 @@ def add_parser(subparsers, parent_parser): help="Continuous metrics file to visualize.", ) plot_diff_parser.add_argument( - "-f", "--file", help="Name of the generated file." + "-f", "--file", default=None, help="Name of the generated file." ) plot_diff_parser.add_argument( "revisions", @@ -233,4 +243,11 @@ def add_parser(subparsers, parent_parser): default=False, help="Do not wrap Vega plot JSON with HTML.", ) + plot_diff_parser.add_argument("--title", default=None, help="Plot title.") + plot_diff_parser.add_argument( + "--x-title", default=None, help="X axis " "title." + ) + plot_diff_parser.add_argument( + "--y-title", default=None, help="Y axis " "title." + ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index 65e3a25aac..a7868b1e2c 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -60,10 +60,11 @@ def fill_template( template_path, revisions, fields=None, - x_field=None, - y_field=None, path=None, csv_header=True, + x_field=None, + y_field=None, + **kwargs ): if x_field and fields: fields.add(x_field) @@ -93,11 +94,7 @@ def fill_template( ) if y_anchor and not y_field: - all_fields = list(first(rev_data_points).keys()) - all_fields.remove(PlotData.REVISION_FIELD) - if x_field and x_field in all_fields: - all_fields.remove(x_field) - y_field = last(all_fields) + y_field = _infer_y_field(rev_data_points, x_field) tmp_data.extend(rev_data_points) template_data[datafile] = tmp_data @@ -106,21 +103,26 @@ def fill_template( raise NoDataForTemplateError(template_path) return Template.fill( - template_path, template_data, datafile, x_field, y_field + template_path, + template_data, + priority_datafile=datafile, + x_field=x_field, + y_field=y_field, + **kwargs ) +def _infer_y_field(rev_data_points, x_field): + all_fields = list(first(rev_data_points).keys()) + all_fields.remove(PlotData.REVISION_FIELD) + if x_field and x_field in all_fields: + all_fields.remove(x_field) + y_field = last(all_fields) + return y_field + + def plot( - repo, - datafile=None, - template=None, - revisions=None, - fields=None, - x_field=None, - y_field=None, - path=None, - embed=False, - csv_header=True, + repo, datafile=None, template=None, revisions=None, embed=False, **kwargs ): if revisions is None: from dvc.repo.plot.data import WORKSPACE_REVISION_NAME @@ -133,15 +135,7 @@ def plot( template_path = _evaluate_templatepath(repo, template) plot_content = fill_template( - repo, - datafile, - template_path, - revisions, - fields, - x_field, - y_field, - path, - csv_header, + repo, datafile, template_path, revisions, **kwargs ) if embed: @@ -155,7 +149,7 @@ def _parse_template(template_path, priority_datafile): with open(template_path, "r") as fobj: tempalte_content = fobj.read() - template_datafiles = Template.parse_data_placeholders(tempalte_content) + template_datafiles = Template.parse_data_anchors(tempalte_content) if priority_datafile: if len(template_datafiles) > 1: raise TooManyDataSourcesError( @@ -165,6 +159,6 @@ def _parse_template(template_path, priority_datafile): return ( template_datafiles, - Template.X_AXIS_ANCHOR in tempalte_content, - Template.Y_AXIS_ANCHOR in tempalte_content, + Template.X_ANCHOR in tempalte_content, + Template.Y_ANCHOR in tempalte_content, ) diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index 67b77e2c4f..0685dc7d9f 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -29,9 +29,11 @@ class Template: SEPARATORS = (",", ": ") EXTENSION = ".json" METRIC_DATA_ANCHOR = "" - X_AXIS_ANCHOR = "" - Y_AXIS_ANCHOR = "" + X_ANCHOR = "" + Y_ANCHOR = "" TITLE_ANCHOR = "" + X_TITLE_ANCHOR = "" + Y_TITLE_ANCHOR = "" def __init__(self, templates_dir): self.plot_templates_dir = templates_dir @@ -69,22 +71,22 @@ def load_template(self, path): raise DvcException("Not in repo nor in defaults") @staticmethod - def get_data_placeholders(template_content): + def get_data_anchor(template_content): regex = re.compile('""]*>"') return regex.findall(template_content) @staticmethod - def parse_data_placeholders(template_content): + def parse_data_anchors(template_content): data_files = { Template.get_datafile(m) - for m in Template.get_data_placeholders(template_content) + for m in Template.get_data_anchor(template_content) } return {df for df in data_files if df} @staticmethod - def get_datafile(placeholder_string): + def get_datafile(anchor_string): return ( - placeholder_string.replace("<", "") + anchor_string.replace("<", "") .replace(">", "") .replace('"', "") .replace("DVC_METRIC_DATA", "") @@ -93,26 +95,71 @@ def get_datafile(placeholder_string): @staticmethod def fill( - template_path, data, priority_datafile=None, x_field=None, y_field=None + template_path, + data, + priority_datafile=None, + x_field=None, + y_field=None, + title=None, + x_title=None, + y_title=None, ): with open(template_path, "r") as fobj: result_content = fobj.read() - for placeholder in Template.get_data_placeholders(result_content): - file = Template.get_datafile(placeholder) + result_content = Template._replace_data_anchors( + result_content, data, priority_datafile + ) + + result_content = Template._replace_metadata_anchors( + result_content, title, x_field, x_title, y_field, y_title + ) + + return result_content + + @staticmethod + def _replace_metadata_anchors( + result_content, title, x_field, x_title, y_field, y_title + ): + if Template.TITLE_ANCHOR in result_content: + if title: + result_content = result_content.replace( + Template.TITLE_ANCHOR, title + ) + else: + result_content = result_content.replace( + Template.TITLE_ANCHOR, "" + ) + if Template.X_ANCHOR in result_content and x_field: + result_content = result_content.replace(Template.X_ANCHOR, x_field) + if Template.Y_ANCHOR in result_content and y_field: + result_content = result_content.replace(Template.Y_ANCHOR, y_field) + if Template.X_TITLE_ANCHOR in result_content: + if not x_title and x_field: + x_title = x_field + result_content = result_content.replace( + Template.X_TITLE_ANCHOR, x_title + ) + if Template.Y_TITLE_ANCHOR in result_content: + if not y_title and y_field: + y_title = y_field + result_content = result_content.replace( + Template.Y_TITLE_ANCHOR, y_title + ) + return result_content + + @staticmethod + def _replace_data_anchors(result_content, data, priority_datafile): + for anchor in Template.get_data_anchor(result_content): + file = Template.get_datafile(anchor) if not file or priority_datafile: key = priority_datafile else: key = file - if Template.TITLE_ANCHOR in result_content: - result_content = result_content.replace( - Template.TITLE_ANCHOR, key - ) - result_content = result_content.replace( - placeholder, + anchor, json.dumps( data[key], indent=Template.INDENT, @@ -120,16 +167,6 @@ def fill( sort_keys=True, ), ) - - if Template.X_AXIS_ANCHOR in result_content and x_field: - result_content = result_content.replace( - Template.X_AXIS_ANCHOR, x_field - ) - if Template.Y_AXIS_ANCHOR in result_content and y_field: - result_content = result_content.replace( - Template.Y_AXIS_ANCHOR, y_field - ) - return result_content @@ -142,8 +179,16 @@ class DefaultLinearTemplate(Template): "title": Template.TITLE_ANCHOR, "mark": {"type": "line"}, "encoding": { - "x": {"field": Template.X_AXIS_ANCHOR, "type": "quantitative"}, - "y": {"field": Template.Y_AXIS_ANCHOR, "type": "quantitative"}, + "x": { + "field": Template.X_ANCHOR, + "type": "quantitative", + "title": Template.X_TITLE_ANCHOR, + }, + "y": { + "field": Template.Y_ANCHOR, + "type": "quantitative", + "title": Template.Y_TITLE_ANCHOR, + }, "color": {"field": "rev", "type": "nominal"}, }, } @@ -158,14 +203,16 @@ class DefaultConfusionTemplate(Template): "mark": "rect", "encoding": { "x": { - "field": Template.X_AXIS_ANCHOR, + "field": Template.X_ANCHOR, "type": "nominal", "sort": "ascending", + "title": Template.X_TITLE_ANCHOR, }, "y": { - "field": Template.Y_AXIS_ANCHOR, + "field": Template.Y_ANCHOR, "type": "nominal", "sort": "ascending", + "title": Template.Y_TITLE_ANCHOR, }, "color": {"aggregate": "count", "type": "quantitative"}, "facet": {"field": "rev", "type": "nominal"}, @@ -181,8 +228,16 @@ class DefaultScatterTemplate(Template): "title": Template.TITLE_ANCHOR, "mark": "point", "encoding": { - "x": {"field": Template.X_AXIS_ANCHOR, "type": "quantitative"}, - "y": {"field": Template.Y_AXIS_ANCHOR, "type": "quantitative"}, + "x": { + "field": Template.X_ANCHOR, + "type": "quantitative", + "title": Template.X_TITLE_ANCHOR, + }, + "y": { + "field": Template.Y_ANCHOR, + "type": "quantitative", + "title": Template.Y_TITLE_ANCHOR, + }, "color": {"field": "rev", "type": "nominal"}, }, } diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 9458bac0e1..012a4fd153 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -55,20 +55,29 @@ def _write_json(tmp_dir, metric, filename): def test_plot_csv_one_column(tmp_dir, scm, dvc): - # for single column write with no header, hence `value` in result + # no header metric = [{"val": 2}, {"val": 3}] _write_csv(metric, "metric.csv", header=False) _run_with_metric(tmp_dir, metric_filename="metric.csv") - plot_string = dvc.plot("metric.csv", csv_header=False) + plot_string = dvc.plot( + "metric.csv", + csv_header=False, + x_title="x_title", + y_title="y_title", + title="mytitle", + ) plot_content = json.loads(plot_string) + assert plot_content["title"] == "mytitle" assert plot_content["data"]["values"] == [ {"0": "2", PlotData.INDEX_FIELD: 0, "rev": "workspace"}, {"0": "3", PlotData.INDEX_FIELD: 1, "rev": "workspace"}, ] assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "0" + assert plot_content["encoding"]["x"]["title"] == "x_title" + assert plot_content["encoding"]["y"]["title"] == "y_title" def test_plot_csv_multiple_columns(tmp_dir, scm, dvc): @@ -200,7 +209,6 @@ def test_plot_confusion(tmp_dir, dvc): ] assert plot_content["encoding"]["x"]["field"] == "predicted" assert plot_content["encoding"]["y"]["field"] == "actual" - assert plot_content["title"] == "metric.json" def test_plot_multiple_revs_default(tmp_dir, scm, dvc): diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 6804e114be..4a5665dfc9 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -23,6 +23,12 @@ def test_metrics_diff(mocker): "x_field", "-y", "y_field", + "--title", + "my_title", + "--x-title", + "x_title", + "--y-title", + "y_title", "HEAD", "tag1", "tag2", @@ -48,6 +54,9 @@ def test_metrics_diff(mocker): x_field="x_field", y_field="y_field", csv_header=True, + title="my_title", + x_title="x_title", + y_title="y_title", ) @@ -88,6 +97,9 @@ def test_metrics_show(mocker): x_field=None, y_field=None, csv_header=False, + title=None, + x_title=None, + y_title=None, ) From 5f2793c50e2b970f33750b3eaf43e33176684f2e Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 30 Apr 2020 23:30:01 +0200 Subject: [PATCH 097/102] plot: xlab ylab --- dvc/command/plot.py | 20 ++++++-------------- tests/unit/command/test_plot.py | 4 ++-- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index e7acaadfb8..d1b980c288 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -55,8 +55,8 @@ def run(self): embed=not self.args.no_html, csv_header=not self.args.no_csv_header, title=self.args.title, - x_title=self.args.x_title, - y_title=self.args.y_title, + x_title=self.args.xlab, + y_title=self.args.ylab, ) if self.args.stdout: @@ -170,12 +170,8 @@ def add_parser(subparsers, parent_parser): help="Do not wrap Vega plot JSON with HTML.", ) plot_show_parser.add_argument("--title", default=None, help="Plot title.") - plot_show_parser.add_argument( - "--x-title", default=None, help="X axis " "title." - ) - plot_show_parser.add_argument( - "--y-title", default=None, help="Y axis " "title." - ) + plot_show_parser.add_argument("--xlab", default=None, help="X axis title.") + plot_show_parser.add_argument("--ylab", default=None, help="Y axis title.") plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -244,10 +240,6 @@ def add_parser(subparsers, parent_parser): help="Do not wrap Vega plot JSON with HTML.", ) plot_diff_parser.add_argument("--title", default=None, help="Plot title.") - plot_diff_parser.add_argument( - "--x-title", default=None, help="X axis " "title." - ) - plot_diff_parser.add_argument( - "--y-title", default=None, help="Y axis " "title." - ) + plot_diff_parser.add_argument("--xlab", default=None, help="X axis title.") + plot_diff_parser.add_argument("--ylab", default=None, help="Y axis title.") plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/tests/unit/command/test_plot.py b/tests/unit/command/test_plot.py index 4a5665dfc9..c0a3b00bc9 100644 --- a/tests/unit/command/test_plot.py +++ b/tests/unit/command/test_plot.py @@ -25,9 +25,9 @@ def test_metrics_diff(mocker): "y_field", "--title", "my_title", - "--x-title", + "--xlab", "x_title", - "--y-title", + "--ylab", "y_title", "HEAD", "tag1", From dad99eed1fa9fcd5deb992f2e3086f239524eb30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 1 May 2020 11:38:03 +0200 Subject: [PATCH 098/102] Update dvc/repo/plot/template.py Co-authored-by: Ruslan Kuprieiev --- dvc/repo/plot/template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index 0685dc7d9f..595dfb4421 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -class TemplateNotFound(DvcException): +class TemplateNotFoundError(DvcException): def __init__(self, path): super().__init__("Template '{}' not found.".format(path)) From 218e2f12f4807c1b0fb13ae5c3d50e15488db461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 1 May 2020 12:06:21 +0200 Subject: [PATCH 099/102] Update dvc/repo/plot/template.py Co-authored-by: Ruslan Kuprieiev --- dvc/repo/plot/template.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index 595dfb4421..1c5004a534 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -41,9 +41,6 @@ def __init__(self, templates_dir): def dump(self): makedirs(self.plot_templates_dir, exist_ok=True) - if not os.path.exists(self.plot_templates_dir): - makedirs(self.plot_templates_dir) - with open( os.path.join( self.plot_templates_dir, self.TEMPLATE_NAME + self.EXTENSION From 0c6159d496c7d21823ca12e78e7cbf00393c35f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 1 May 2020 15:45:06 +0200 Subject: [PATCH 100/102] efiop review --- dvc/command/plot.py | 2 -- dvc/repo/plot/data.py | 21 ++++++++++++++++----- dvc/repo/plot/template.py | 17 ++--------------- tests/func/test_plot.py | 21 +++++++++++++++++++-- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index d1b980c288..23c18b0b8c 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -151,7 +151,6 @@ def add_parser(subparsers, parent_parser): "-y", default=None, help="Field name for y axis." ) plot_show_parser.add_argument( - "-o", "--stdout", action="store_true", default=False, @@ -221,7 +220,6 @@ def add_parser(subparsers, parent_parser): "-y", default=None, help="Field name for y axis." ) plot_diff_parser.add_argument( - "-o", "--stdout", action="store_true", default=False, diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index 5eafc91b7a..b1be860685 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -6,8 +6,9 @@ from collections import OrderedDict from copy import copy +import yaml from funcy import first -from ruamel import yaml +from yaml import SafeLoader from dvc.exceptions import DvcException, PathMissingError @@ -64,7 +65,7 @@ def plot_data(filename, revision, content): elif extension == ".tsv": return CSVPlotData(filename, revision, content, delimiter="\t") elif extension == ".yaml": - return YAMLPLotData(filename, revision, content) + return YAMLPlotData(filename, revision, content) raise PlotMetricTypeError(filename) @@ -226,10 +227,20 @@ def raw(self, csv_header=True, **kwargs): ] -class YAMLPLotData(PlotData): +class YAMLPlotData(PlotData): def raw(self, **kwargs): - # TODO ordered - return yaml.parse(io.StringIO(self.content)) + class OrderedLoader(SafeLoader): + pass + + def construct_mapping(loader, node): + loader.flatten_mapping(node) + return OrderedDict(loader.construct_pairs(node)) + + OrderedLoader.add_constructor( + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping + ) + + return yaml.load(self.content, OrderedLoader) def _load_from_revision(repo, datafile, revision): diff --git a/dvc/repo/plot/template.py b/dvc/repo/plot/template.py index 1c5004a534..2850a6220f 100644 --- a/dvc/repo/plot/template.py +++ b/dvc/repo/plot/template.py @@ -54,19 +54,6 @@ def dump(self): separators=self.SEPARATORS, ) - def load_template(self, path): - try: - with open(path, "r") as fd: - return json.load(fd) - except FileNotFoundError: - try: - with open( - os.path.join(self.plot_templates_dir, path), "r" - ) as fd: - return json.load(fd) - except FileNotFoundError: - raise DvcException("Not in repo nor in defaults") - @staticmethod def get_data_anchor(template_content): regex = re.compile('""]*>"') @@ -256,7 +243,7 @@ def templates_dir(self): def default_template(self): default_plot_path = os.path.join(self.templates_dir, "default.json") if not os.path.exists(default_plot_path): - raise TemplateNotFound(os.path.relpath(default_plot_path)) + raise TemplateNotFoundError(os.path.relpath(default_plot_path)) return default_plot_path def get_template(self, path): @@ -278,7 +265,7 @@ def get_template(self, path): assert len(matches) == 1 return matches[0] - raise TemplateNotFound(path) + raise TemplateNotFoundError(path) def __init__(self, dvc_dir): self.dvc_dir = dvc_dir diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 012a4fd153..6be11320f8 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -5,6 +5,7 @@ from collections import OrderedDict import pytest +import yaml from bs4 import BeautifulSoup from funcy import first @@ -15,7 +16,7 @@ PlotData, ) from dvc.repo.plot.template import ( - TemplateNotFound, + TemplateNotFoundError, NoDataForTemplateError, ) from dvc.repo.plot import NoDataOrTemplateProvided @@ -413,7 +414,7 @@ def test_should_raise_on_no_template_and_datafile(tmp_dir, dvc): def test_should_raise_on_no_template(tmp_dir, dvc): - with pytest.raises(TemplateNotFound): + with pytest.raises(TemplateNotFoundError): dvc.plot("metric.json", "non_existing_template.json") @@ -485,3 +486,19 @@ def test_plot_embed(tmp_dir, scm, dvc): assert _remove_whitespace(data_dump) in _remove_whitespace( first(page_content.body.script.contents) ) + + +def test_plot_yaml(tmp_dir, scm, dvc): + metric = [{"val": 2}, {"val": 3}] + with open("metric.yaml", "w") as fobj: + yaml.dump(metric, fobj) + + _run_with_metric(tmp_dir, metric_filename="metric.yaml") + + plot_string = dvc.plot("metric.yaml",) + + plot_content = json.loads(plot_string) + assert plot_content["data"]["values"] == [ + {"val": 2, PlotData.INDEX_FIELD: 0, "rev": "workspace"}, + {"val": 3, PlotData.INDEX_FIELD: 1, "rev": "workspace"}, + ] From eeef8ffee1ffe284a3340a57280d801762eea380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 1 May 2020 17:46:13 +0200 Subject: [PATCH 101/102] plot: bash completion --- dvc/command/plot.py | 24 ++++++++++++------------ dvc/repo/plot/data.py | 2 +- scripts/completion/dvc.bash | 5 ++++- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 23c18b0b8c..b507e73063 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -129,12 +129,6 @@ def add_parser(subparsers, parent_parser): default=None, help="File to be injected with data.", ) - plot_show_parser.add_argument( - "datafile", - nargs="?", - default=None, - help="Continuous metrics file to visualize.", - ) plot_show_parser.add_argument( "-f", "--file", default=None, help="Name of the generated file." ) @@ -171,6 +165,12 @@ def add_parser(subparsers, parent_parser): plot_show_parser.add_argument("--title", default=None, help="Plot title.") plot_show_parser.add_argument("--xlab", default=None, help="X axis title.") plot_show_parser.add_argument("--ylab", default=None, help="Y axis title.") + plot_show_parser.add_argument( + "datafile", + nargs="?", + default=None, + help="Continuous metrics file to visualize.", + ) plot_show_parser.set_defaults(func=CmdPlotShow) PLOT_DIFF_HELP = ( @@ -201,12 +201,6 @@ def add_parser(subparsers, parent_parser): plot_diff_parser.add_argument( "-f", "--file", default=None, help="Name of the generated file." ) - plot_diff_parser.add_argument( - "revisions", - nargs="*", - default=None, - help="Git revisions to plot from", - ) plot_diff_parser.add_argument( "-s", "--select", @@ -240,4 +234,10 @@ def add_parser(subparsers, parent_parser): plot_diff_parser.add_argument("--title", default=None, help="Plot title.") plot_diff_parser.add_argument("--xlab", default=None, help="X axis title.") plot_diff_parser.add_argument("--ylab", default=None, help="Y axis title.") + plot_diff_parser.add_argument( + "revisions", + nargs="*", + default=None, + help="Git revisions to plot from", + ) plot_diff_parser.set_defaults(func=CmdPlotDiff) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index b1be860685..eafdf8e077 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -277,7 +277,7 @@ def _load_from_revisions(repo, datafile, revisions): exceptions.append(e) except PlotMetricTypeError: raise - except Exception: + except (yaml.error.YAMLError, json.decoder.JSONDecodeError, csv.Error): logger.error("Failed to parse '{}' at '{}'.".format(datafile, rev)) raise diff --git a/scripts/completion/dvc.bash b/scripts/completion/dvc.bash index cde8130e62..78f966acde 100644 --- a/scripts/completion/dvc.bash +++ b/scripts/completion/dvc.bash @@ -5,7 +5,7 @@ # - https://stackoverflow.com/questions/12933362 _dvc_commands='add cache checkout commit config destroy diff fetch get-url get gc \ - import-url import init install lock list metrics move pipeline pull push \ + import-url import init install lock list metrics move pipeline plot pull push \ remote remove repro root run status unlock unprotect update version' _dvc_options='-h --help -V --version' @@ -51,6 +51,9 @@ _dvc_pipeline='list show' _dvc_pipeline_list='' _dvc_pipeline_show='-c --commands -o --outs --ascii --dot --tree -l --locked' _dvc_pipeline_show_COMPGEN=_dvc_compgen_DVCFiles +_dvc_plot='show diff' +_dvc_plot_show='-t --template -f --file -s --select -x -y --stdout --no-csv-header --no-html --title --xlab --ylab' +_dvc_plot_diff='-t --template -d --datafile -f --file -s --select -x -y --stdout --no-csv-header --no-html --title --xlab --ylab' _dvc_pull='-j --jobs -r --remote -a --all-branches -T --all-tags -f --force -d --with-deps -R --recursive' _dvc_pull_COMPGEN=_dvc_compgen_DVCFiles _dvc_push='-j --jobs -r --remote -a --all-branches -T --all-tags -d --with-deps -R --recursive' From 333882dbe0c8d6daf6e05abdd8ca8e6edabcfd94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 1 May 2020 18:23:21 +0200 Subject: [PATCH 102/102] plot: static code analysis fixes --- dvc/repo/plot/__init__.py | 6 +++--- dvc/repo/plot/data.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plot/__init__.py b/dvc/repo/plot/__init__.py index a7868b1e2c..92a1376e31 100644 --- a/dvc/repo/plot/__init__.py +++ b/dvc/repo/plot/__init__.py @@ -80,10 +80,10 @@ def fill_template( x_field = PlotData.INDEX_FIELD template_data = {} - for datafile in template_datafiles: + for template_datafile in template_datafiles: from dvc.repo.plot.data import _load_from_revisions - plot_datas = _load_from_revisions(repo, datafile, revisions) + plot_datas = _load_from_revisions(repo, template_datafile, revisions) tmp_data = [] for pd in plot_datas: rev_data_points = pd.to_datapoints( @@ -97,7 +97,7 @@ def fill_template( y_field = _infer_y_field(rev_data_points, x_field) tmp_data.extend(rev_data_points) - template_data[datafile] = tmp_data + template_data[template_datafile] = tmp_data if len(template_data) == 0: raise NoDataForTemplateError(template_path) diff --git a/dvc/repo/plot/data.py b/dvc/repo/plot/data.py index eafdf8e077..31b53b2692 100644 --- a/dvc/repo/plot/data.py +++ b/dvc/repo/plot/data.py @@ -122,7 +122,7 @@ def _apply_path(data, path=None, **kwargs): def _lists(dictionary): - for key, value in dictionary.items(): + for _, value in dictionary.items(): if isinstance(value, dict): yield from (_lists(value)) elif isinstance(value, list): @@ -138,7 +138,7 @@ def _find_data(data, fields=None, **kwargs): fields = set() for l in _lists(data): - if all([isinstance(dp, dict) for dp in l]): + if all(isinstance(dp, dict) for dp in l): if set(first(l).keys()) & fields == fields: return l raise PlotDataStructureError()