Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 182 additions & 3 deletions dvc/repo/plots/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,80 @@
import logging

from funcy import first, project

from dvc.exceptions import DvcException, NoPlotsError, OutputNotFoundError
from dvc.repo.tree import RepoTree
from dvc.schema import PLOT_PROPS

logger = logging.getLogger(__name__)


class Plots:
def __init__(self, repo):
self.repo = repo

def show(self, *args, **kwargs):
from .show import show
def collect(self, targets=None, revs=None):
"""Collects all props and data for plots.

Returns a structure like:
{rev: {plots.csv: {
props: {x: ..., "csv_header": ..., ...},
data: "...data as a string...",
}}}
Data parsing is postponed, since it's affected by props.
"""
targets = [targets] if isinstance(targets, str) else targets or []
data = {}
for rev in self.repo.brancher(revs=revs):
# .brancher() adds unwanted workspace
if revs is not None and rev not in revs:
continue
rev = rev or "workspace"

tree = RepoTree(self.repo)
plots = _collect_plots(self.repo, targets, rev)
for datafile, props in plots.items():
data[rev] = {datafile: {"props": props}}

# Load data from git or dvc cache
try:
with tree.open(datafile) as fd:
data[rev][datafile]["data"] = fd.read()
except FileNotFoundError:
# This might happen simply because cache is absent
pass

return data

def render(self, data, revs=None, props=None, templates=None):
"""Renders plots"""
props = props or {}
templates = templates or self.repo.plot_templates

return show(self.repo, *args, **kwargs)
# Merge data by plot file and apply overriding props
plots = _prepare_plots(data, revs, props)

return {
datafile: _render(datafile, desc["data"], desc["props"], templates)
for datafile, desc in plots.items()
}

def show(self, targets=None, revs=None, props=None):
from .data import NoMetricInHistoryError

data = self.collect(targets, revs)

# If any mentioned plot doesn't have any data then that's an error
targets = [targets] if isinstance(targets, str) else targets or []
for target in targets:
if not any("data" in d[target] for d in data.values()):
raise NoMetricInHistoryError(target)

# No data at all is a special error with a special message
if not data:
raise NoPlotsError()

return self.render(data, revs, props)

def diff(self, *args, **kwargs):
from .diff import diff
Expand Down Expand Up @@ -33,3 +102,113 @@ def modify(self, path, props=None, unset=None):

dvcfile = Dvcfile(self.repo, out.stage.path)
dvcfile.dump(out.stage, update_pipeline=True)
Comment thread
Suor marked this conversation as resolved.
Outdated


def _collect_plots(repo, targets=None, rev=None):
def _targets_to_outs(targets):
for t in targets:
try:
(out,) = repo.find_outs_by_path(t)
yield out
except OutputNotFoundError:
logger.warning(
"File '{}' was not found at: '{}'. It will not be "
"plotted.".format(t, rev)
)

if targets:
outs = _targets_to_outs(targets)
else:
outs = (out for stage in repo.stages for out in stage.outs if out.plot)

return {str(out): _plot_props(out) for out in outs}


def _plot_props(out):
if not out.plot:
raise DvcException(
f"'{out}' is not a plot. Use `dvc plots modify` to change that."
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plots modify can change a file that is not a plot into a plot? Or maybe I don't understand this message.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it can. It's a feature.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK thanks. What I meant is that I didn't understand what the sentence meant. Thinking about it now I think I get it: a regular output is being given to a plots command right? And you can make it into a plot with plots modify. So 2 things:

  1. I recommend something like this for message for now: "'{out}' is found in stage '{stg}' as an output but not as a plot. You can make it into a plot with `plots modify ...`
  2. Actually, what's the command to make an output into a plot with plots modify? I'd like to document this feature.
  3. Why not just try to plot the output instead and throw a formatting error if it's not valid?

Thanks

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Too long, and not any useful information than the one provided already.
  2. dvc plots modify output-file-name-that-is-not-a-plot.file_ext
    You can provide any other flags on the same command, like --template too.

Copy link
Copy Markdown
Contributor

@jorgeorpinel jorgeorpinel Jun 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existing string is hard to read and understand. Can we come up with a better suggestion if mine isn't liked? "to change that" to change what?

Copy link
Copy Markdown
Contributor

@jorgeorpinel jorgeorpinel Jun 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E.g. The given output file is not a known plot. Use `dvc plots modify {out}` to turn it into one.

p.s. why is it bad that a message to the user is relatively long? (2 sentences)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, can plots modify make ANY file into a plot, or only other outputs? What about existing metrics?
Does it validate plot file format?
Thanks

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

p.s. why is it bad that a message to the user is relatively long? (2 sentences)

The longer the message the smaller percentage of people will read it.

Also, can plots modify make ANY file into a plot, or only other outputs?

Only outputs. In current dvc-file design only outputs may become metrics or plots. Metric might become a plot. Not sure whether it will stop being a metric.

Does it validate plot file format?

No. dvc run doesn't validate it either. Strictly speaking we don't have such thing as validation here, we can check whether loading it results in an error though.

Copy link
Copy Markdown
Contributor

@jorgeorpinel jorgeorpinel Jun 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The longer the message the smaller percentage of people will read it.

Arguable. But if the text is hard to understand, it doesn't matter how many people read it. I guess I'll just sent a PR with a suggested text instead of arguing here.

Thanks for the other info. TBH it seems a little obscure/ arbitrary behavior but I'm not opposed to it. I just think it would be best to re-think it. Not really my realm so I'll just ping @shcheklein and @efiop here 🙂

dvc run doesn't validate it... we can check whether loading it results in an error

Please note that coincidentally, I opened an issue about this (not related to this conversation originally): #4068

)
if isinstance(out.plot, list):
raise DvcException("Multiple plots per data file not supported.")
Comment thread
jorgeorpinel marked this conversation as resolved.
if isinstance(out.plot, bool):
return {}

return project(out.plot, PLOT_PROPS)


def _prepare_plots(data, revs, props):
"""Groups data by plot file.

Also resolves props conflicts between revs and applies global props.
"""
# we go in order revs are supplied on props conflict first ones win.
revs = iter(data) if revs is None else revs

plots, props_revs = {}, {}
for rev in revs:
# Asked for revision without data
if rev not in data:
continue

for datafile, desc in data[rev].items():
# props from command line overwrite plot props from out definition
full_props = {**desc["props"], **props}

if datafile in plots:
saved = plots[datafile]
if saved["props"] != full_props:
logger.warning(
f"Inconsistent plot props for '{datafile}' in "
f"'{props_revs[datafile]}' and '{rev}'. "
f"Going to use ones from '{props_revs[datafile]}'"
)

saved["data"][rev] = desc["data"]
else:
plots[datafile] = {
"props": full_props,
"data": {rev: desc["data"]},
}
# Save rev we got props from
props_revs[datafile] = rev

return plots


def _render(datafile, datas, props, templates):
from .data import plot_data, PlotData

# Copy it to not modify a passed value
props = props.copy()

# Add x and y to fields if set
fields = props.get("fields")
if fields is not None:
fields = {*fields, props.get("x"), props.get("y")} - {None}

template = templates.load(props.get("template") or "default")

# If x is not set add index field
if not props.get("x") and template.has_anchor("x"):
props["append_index"] = True
props["x"] = PlotData.INDEX_FIELD

# Parse all data, preprocess it and collect as a list of dicts
data = []
for rev, datablob in datas.items():
rev_data = plot_data(datafile, rev, datablob).to_datapoints(
fields=fields,
path=props.get("path"),
csv_header=props.get("csv_header", True),
append_index=props.get("append_index", False),
)
data.extend(rev_data)

# If y is not set then use last field not used yet
if not props.get("y") and template.has_anchor("y"):
fields = list(first(data))
skip = (PlotData.REVISION_FIELD, props.get("x"))
props["y"] = first(f for f in reversed(fields) if f not in skip)

return template.render(data, props=props)
66 changes: 2 additions & 64 deletions dvc/repo/plots/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import csv
import io
import json
import logging
import os
from collections import OrderedDict
from copy import copy
Expand All @@ -10,9 +9,7 @@
from funcy import first
from yaml import SafeLoader

from dvc.exceptions import DvcException, PathMissingError

logger = logging.getLogger(__name__)
from dvc.exceptions import DvcException


class PlotMetricTypeError(DvcException):
Expand All @@ -31,21 +28,6 @@ def __init__(self):
)


class JsonParsingError(DvcException):
def __init__(self, file):
super().__init__(
"Failed to infer data structure from '{}'. Did you forget "
"to specify JSONpath?".format(file)
)


class NoMetricOnRevisionError(DvcException):
def __init__(self, path, revision):
self.path = path
self.revision = revision
super().__init__(f"Could not find '{path}' on revision '{revision}'")


class NoMetricInHistoryError(DvcException):
def __init__(self, path):
super().__init__(f"Could not find '{path}'.")
Expand Down Expand Up @@ -77,7 +59,7 @@ def _filter_fields(data_points, filename, revision, fields=None, **kwargs):
if keys & fields != fields:
raise DvcException(
"Could not find fields: '{}' for '{}' at '{}'.".format(
", " "".join(fields), filename, revision
", ".join(fields), filename, revision
)
)

Expand Down Expand Up @@ -240,47 +222,3 @@ def construct_mapping(loader, node):
def _processors(self):
parent_processors = super()._processors()
return [_find_data] + parent_processors


def _load_from_revision(repo, datafile, revision):
from dvc.repo.tree import RepoTree

tree = RepoTree(repo)

try:
with tree.open(datafile) as fobj:
datafile_content = fobj.read()

except (FileNotFoundError, PathMissingError):
raise NoMetricOnRevisionError(datafile, revision)

return plot_data(datafile, revision, datafile_content)


def _load_from_revisions(repo, datafile, revisions):
data = []
exceptions = []

for rev in repo.brancher(revs=revisions):
if rev == "workspace" and rev not in revisions:
continue

try:
data.append(_load_from_revision(repo, datafile, rev))
except NoMetricOnRevisionError as e:
exceptions.append(e)
except PlotMetricTypeError:
raise
except (yaml.error.YAMLError, json.decoder.JSONDecodeError, csv.Error):
logger.error(f"Failed to parse '{datafile}' at '{rev}'.")
raise

if not data and exceptions:
raise NoMetricInHistoryError(datafile)
else:
for e in exceptions:
logger.warning(
"File '{}' was not found at: '{}'. It will not be "
"plotted.".format(e.path, e.revision)
)
return data
Loading