-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Refactor plots #3994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor plots #3994
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,80 @@ | ||
| import logging | ||
|
|
||
| from funcy import first, project | ||
|
|
||
| from dvc.exceptions import DvcException, NoPlotsError, OutputNotFoundError | ||
| from dvc.repo.tree import RepoTree | ||
| from dvc.schema import PLOT_PROPS | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class Plots: | ||
| def __init__(self, repo): | ||
| self.repo = repo | ||
|
|
||
| def show(self, *args, **kwargs): | ||
| from .show import show | ||
| def collect(self, targets=None, revs=None): | ||
| """Collects all props and data for plots. | ||
|
|
||
| Returns a structure like: | ||
| {rev: {plots.csv: { | ||
| props: {x: ..., "csv_header": ..., ...}, | ||
| data: "...data as a string...", | ||
| }}} | ||
| Data parsing is postponed, since it's affected by props. | ||
| """ | ||
| targets = [targets] if isinstance(targets, str) else targets or [] | ||
| data = {} | ||
| for rev in self.repo.brancher(revs=revs): | ||
| # .brancher() adds unwanted workspace | ||
| if revs is not None and rev not in revs: | ||
| continue | ||
| rev = rev or "workspace" | ||
|
|
||
| tree = RepoTree(self.repo) | ||
| plots = _collect_plots(self.repo, targets, rev) | ||
| for datafile, props in plots.items(): | ||
| data[rev] = {datafile: {"props": props}} | ||
|
|
||
| # Load data from git or dvc cache | ||
| try: | ||
| with tree.open(datafile) as fd: | ||
| data[rev][datafile]["data"] = fd.read() | ||
| except FileNotFoundError: | ||
| # This might happen simply because cache is absent | ||
| pass | ||
|
|
||
| return data | ||
|
|
||
| def render(self, data, revs=None, props=None, templates=None): | ||
| """Renders plots""" | ||
| props = props or {} | ||
| templates = templates or self.repo.plot_templates | ||
|
|
||
| return show(self.repo, *args, **kwargs) | ||
| # Merge data by plot file and apply overriding props | ||
| plots = _prepare_plots(data, revs, props) | ||
|
|
||
| return { | ||
| datafile: _render(datafile, desc["data"], desc["props"], templates) | ||
| for datafile, desc in plots.items() | ||
| } | ||
|
|
||
| def show(self, targets=None, revs=None, props=None): | ||
| from .data import NoMetricInHistoryError | ||
|
|
||
| data = self.collect(targets, revs) | ||
|
|
||
| # If any mentioned plot doesn't have any data then that's an error | ||
| targets = [targets] if isinstance(targets, str) else targets or [] | ||
| for target in targets: | ||
| if not any("data" in d[target] for d in data.values()): | ||
| raise NoMetricInHistoryError(target) | ||
|
|
||
| # No data at all is a special error with a special message | ||
| if not data: | ||
| raise NoPlotsError() | ||
|
|
||
| return self.render(data, revs, props) | ||
|
|
||
| def diff(self, *args, **kwargs): | ||
| from .diff import diff | ||
|
|
@@ -33,3 +102,113 @@ def modify(self, path, props=None, unset=None): | |
|
|
||
| dvcfile = Dvcfile(self.repo, out.stage.path) | ||
| dvcfile.dump(out.stage, update_pipeline=True) | ||
|
|
||
|
|
||
| def _collect_plots(repo, targets=None, rev=None): | ||
| def _targets_to_outs(targets): | ||
| for t in targets: | ||
| try: | ||
| (out,) = repo.find_outs_by_path(t) | ||
| yield out | ||
| except OutputNotFoundError: | ||
| logger.warning( | ||
| "File '{}' was not found at: '{}'. It will not be " | ||
| "plotted.".format(t, rev) | ||
| ) | ||
|
|
||
| if targets: | ||
| outs = _targets_to_outs(targets) | ||
| else: | ||
| outs = (out for stage in repo.stages for out in stage.outs if out.plot) | ||
|
|
||
| return {str(out): _plot_props(out) for out in outs} | ||
|
|
||
|
|
||
| def _plot_props(out): | ||
| if not out.plot: | ||
| raise DvcException( | ||
| f"'{out}' is not a plot. Use `dvc plots modify` to change that." | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. plots modify can change a file that is not a plot into a plot? Or maybe I don't understand this message.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it can. It's a feature.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK thanks. What I meant is that I didn't understand what the sentence meant. Thinking about it now I think I get it: a regular output is being given to a plots command right? And you can make it into a plot with plots modify. So 2 things:
Thanks
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The existing string is hard to read and understand. Can we come up with a better suggestion if mine isn't liked? "to change that" to change what?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. E.g. p.s. why is it bad that a message to the user is relatively long? (2 sentences)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, can plots modify make ANY file into a plot, or only other outputs? What about existing metrics?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The longer the message the smaller percentage of people will read it.
Only outputs. In current dvc-file design only outputs may become metrics or plots. Metric might become a plot. Not sure whether it will stop being a metric.
No.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Arguable. But if the text is hard to understand, it doesn't matter how many people read it. I guess I'll just sent a PR with a suggested text instead of arguing here. Thanks for the other info. TBH it seems a little obscure/ arbitrary behavior but I'm not opposed to it. I just think it would be best to re-think it. Not really my realm so I'll just ping @shcheklein and @efiop here 🙂
Please note that coincidentally, I opened an issue about this (not related to this conversation originally): #4068 |
||
| ) | ||
| if isinstance(out.plot, list): | ||
| raise DvcException("Multiple plots per data file not supported.") | ||
|
jorgeorpinel marked this conversation as resolved.
|
||
| if isinstance(out.plot, bool): | ||
| return {} | ||
|
|
||
| return project(out.plot, PLOT_PROPS) | ||
|
|
||
|
|
||
| def _prepare_plots(data, revs, props): | ||
| """Groups data by plot file. | ||
|
|
||
| Also resolves props conflicts between revs and applies global props. | ||
| """ | ||
| # we go in order revs are supplied on props conflict first ones win. | ||
| revs = iter(data) if revs is None else revs | ||
|
|
||
| plots, props_revs = {}, {} | ||
| for rev in revs: | ||
| # Asked for revision without data | ||
| if rev not in data: | ||
| continue | ||
|
|
||
| for datafile, desc in data[rev].items(): | ||
| # props from command line overwrite plot props from out definition | ||
| full_props = {**desc["props"], **props} | ||
|
|
||
| if datafile in plots: | ||
| saved = plots[datafile] | ||
| if saved["props"] != full_props: | ||
| logger.warning( | ||
| f"Inconsistent plot props for '{datafile}' in " | ||
| f"'{props_revs[datafile]}' and '{rev}'. " | ||
| f"Going to use ones from '{props_revs[datafile]}'" | ||
| ) | ||
|
|
||
| saved["data"][rev] = desc["data"] | ||
| else: | ||
| plots[datafile] = { | ||
| "props": full_props, | ||
| "data": {rev: desc["data"]}, | ||
| } | ||
| # Save rev we got props from | ||
| props_revs[datafile] = rev | ||
|
|
||
| return plots | ||
|
|
||
|
|
||
| def _render(datafile, datas, props, templates): | ||
| from .data import plot_data, PlotData | ||
|
|
||
| # Copy it to not modify a passed value | ||
| props = props.copy() | ||
|
|
||
| # Add x and y to fields if set | ||
| fields = props.get("fields") | ||
| if fields is not None: | ||
| fields = {*fields, props.get("x"), props.get("y")} - {None} | ||
|
|
||
| template = templates.load(props.get("template") or "default") | ||
|
|
||
| # If x is not set add index field | ||
| if not props.get("x") and template.has_anchor("x"): | ||
| props["append_index"] = True | ||
| props["x"] = PlotData.INDEX_FIELD | ||
|
|
||
| # Parse all data, preprocess it and collect as a list of dicts | ||
| data = [] | ||
| for rev, datablob in datas.items(): | ||
| rev_data = plot_data(datafile, rev, datablob).to_datapoints( | ||
| fields=fields, | ||
| path=props.get("path"), | ||
| csv_header=props.get("csv_header", True), | ||
| append_index=props.get("append_index", False), | ||
| ) | ||
| data.extend(rev_data) | ||
|
|
||
| # If y is not set then use last field not used yet | ||
| if not props.get("y") and template.has_anchor("y"): | ||
| fields = list(first(data)) | ||
| skip = (PlotData.REVISION_FIELD, props.get("x")) | ||
| props["y"] = first(f for f in reversed(fields) if f not in skip) | ||
|
|
||
| return template.render(data, props=props) | ||
Uh oh!
There was an error while loading. Please reload this page.