From 6acf73da71cf3f6ce8ca60278aac1f64ffb0edab Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 22 Jul 2019 11:53:38 +0200 Subject: [PATCH 1/2] dvcignore: update description --- src/Documentation/glossary.js | 7 ++ static/docs/user-guide/dvcignore.md | 150 +++++++++++++++++++++++----- 2 files changed, 133 insertions(+), 24 deletions(-) diff --git a/src/Documentation/glossary.js b/src/Documentation/glossary.js index dbc06e458a..0c6504bf22 100644 --- a/src/Documentation/glossary.js +++ b/src/Documentation/glossary.js @@ -37,6 +37,13 @@ export default { desc: 'Stage (DVC-file) created with the `dvc import` or `dvc import-url` ' + 'commands. They represent files or directories from external sources.' + }, + { + name: 'Output', + match: ['output', 'outputs'], + desc: + 'A file or a directory that is under DVC control. See `dvc add`,' + + ' `dvc run`, `dvc import`, `dvc import-url` commands.' } ] } diff --git a/static/docs/user-guide/dvcignore.md b/static/docs/user-guide/dvcignore.md index bf525362e7..9767303ae0 100644 --- a/static/docs/user-guide/dvcignore.md +++ b/static/docs/user-guide/dvcignore.md @@ -1,43 +1,145 @@ -# dvcignore File +# .dvcignore File Marks which files and/or directories should be ignored when traversing repository. -Sometimes you might want DVC to ignore files while traversing the project -directory. For example, when working on a project with many files in its data +Sometimes you might want DVC to ignore some files while working with the +project. For example, when working on a project with many files in its data directory, you might encounter extended execution time for operations that are -as simple as `dvc status`. To prevent this, we are implementing `.dvcignore` -files handling. When fully implemented, their implementation is intended to -provide similar functionality as `.gitignore` files provide for `git`. +as simple as `dvc status`. In other case you might want to omit files or folders +unrelated to the project (like `.DS_Store` on Mac). To address these +requirements we are implementing `.dvcignore` files handling. `.dvcignore` by +design works similar way as `.gitignore` does. ## How does it work? -- You need to create `.dvcignore` file; +- You need to create `.dvcignore` file. - Populate it with [patterns](https://git-scm.com/docs/gitignore) that you would - like to ignore; -- Each line should contain only one pattern; + like to ignore. +- Each line should contain only one pattern. - During execution of commands that traverse directories, DVC will ignore - matching paths; -- Not every operation supports `.dvcignore`. To see current limitations, read - following paragraph. + matching paths. + +## Remarks + +- Ignored files will not be saved in cache, they will be non-existent for DVC. + It's worth to remember that, especially when ignoring files inside DVC-handled + directories. **It is crucial to understand, that DVC might remove ignored + files upon `dvc run` or `dvc repro`. If they are not produced by a + [pipeline](/doc/get-started/pipeline) step, they can be deleted permanently.** +- Keep in mind, that when you add to .dvcignore entries that affect one of the + existing outputs, its status will change and DVC will behave as + if that affected files were deleted. +- If DVC stumbles upon `.dvcignore` file inside a dependency or an + output directory, it raises an error. Ignoring files inside such + directory should be handled from `.dvcignore` file from upper levels of + project tree. -## Current limitations +## Syntax -During development, we noticed that there are few potential uses cases that -might be tricky to handle (e.g. what to do when we are `dvc add`-ing directory -containing `.dvcignore` file). Therefore, we decided to enable this feature -gradually in different parts of the project. +The same as for [`.gitignore`](https://git-scm.com/docs/gitignore). -Currently `.dvcignore` files will be read and applied in any operation that -collects DVC-files (e.g. `checkout`, `metrics`, `status`, `run`, `repro`), so it -is advised to use it in cases described in the first paragraph, when amount of -files in tree of repository directory causes performance issues. +## Examples: Modification of ignored data -## Syntax +Lets see if what happens when we modify ignored file. -The same as for [`.gitignore`](https://git-scm.com/docs/gitignore). +```dvc +$ mkdir data +$ echo data1 >> data/data1 +$ echo data2 >> data/data2 +$ tree . + +. +└── data + ├── data1 + └── data2 +``` + +We created the `data` directory. Lets ignore part of the `data` and add it under +DVC control. + +```dvc +$ echo data/data1 >> .dvcignore +$ cat .dvcignore + +data/data1 + +$ dvc add data +$ tree .dvc/cache + +.dvc/cache +├── 54 +│   └── 40cb5e4c57ab54af68127492334a23.dir +└── ed + └── c3d3797971f12c7f5e1d106dd5cee2 +``` + +As we can see, `data1` has been ignored. Cache contains only one file entry (for +`data2`) and one dir entry (`data`). + +Now, lets modify `data1` and see if it affects `dvc status`. + +```dvc +$ dvc status + +Pipelines are up to date. Nothing to reproduce. + +$ echo "123" >> data/data1 +$ dvc status + +Pipelines are up to date. Nothing to reproduce. +``` + +Same modification applied to not ignored file will make `dvc status` inform +about change. + +```dvc +$ echo "123" >> data/data2 +$ dvc status + +data.dvc: + changed outs: + modified: data +``` + +## Examples: Moving ignored data + +```dvc +$ mkdir data +$ echo data1 >> data/data1 +$ echo data2 >> data/data2 +$ tree . + +. +└── data + ├── data1 + └── data2 + +$ echo data/data1 >> .dvcignore +$ cat .dvcignore + +data/data1 + +$ dvc add data +``` + +If we move not ignored data, DVC will behave as if we modified data directory by +adding new file. + +```dvc +$ dvc status + +Pipelines are up to date. Nothing to reproduce. + +$ mv data/data1 data/data3 +$ dvc status + +data.dvc: + changed outs: + modified: data +``` -## Example +## Examples: Ignore dvc controlled file Lets analyze an example project: From a6a6e6dbcbc9da7e053845c8277db96dc942ebe8 Mon Sep 17 00:00:00 2001 From: Ivan Shcheklein Date: Wed, 31 Jul 2019 09:29:56 -0700 Subject: [PATCH 2/2] Update dvcignore.md --- static/docs/user-guide/dvcignore.md | 35 ++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/static/docs/user-guide/dvcignore.md b/static/docs/user-guide/dvcignore.md index 9767303ae0..cae3fdb29a 100644 --- a/static/docs/user-guide/dvcignore.md +++ b/static/docs/user-guide/dvcignore.md @@ -13,7 +13,8 @@ design works similar way as `.gitignore` does. ## How does it work? -- You need to create `.dvcignore` file. +- You need to create the `.dvcignore` file. It can be placed in the root of the + project or inside any subdirectory (see also [remarks](#Remarks) below). - Populate it with [patterns](https://git-scm.com/docs/gitignore) that you would like to ignore. - Each line should contain only one pattern. @@ -22,18 +23,22 @@ design works similar way as `.gitignore` does. ## Remarks -- Ignored files will not be saved in cache, they will be non-existent for DVC. - It's worth to remember that, especially when ignoring files inside DVC-handled - directories. **It is crucial to understand, that DVC might remove ignored - files upon `dvc run` or `dvc repro`. If they are not produced by a - [pipeline](/doc/get-started/pipeline) step, they can be deleted permanently.** -- Keep in mind, that when you add to .dvcignore entries that affect one of the - existing outputs, its status will change and DVC will behave as - if that affected files were deleted. -- If DVC stumbles upon `.dvcignore` file inside a dependency or an - output directory, it raises an error. Ignoring files inside such - directory should be handled from `.dvcignore` file from upper levels of - project tree. +Ignored files will not be saved in cache, they will be non-existent for DVC. +It's worth to remember that, especially when ignoring files inside DVC-handled +directories. + +**It is crucial to understand, that DVC might remove ignored files upon `dvc +run` or `dvc repro`. If they are not produced by a +[pipeline](/doc/get-started/pipeline) step, they can be deleted permanently.** + +Keep in mind, that when you add to `.dvcignore` entries that affect one of the +existing outputs, its status will change and DVC will behave as if +that affected files were deleted. + +If DVC stumbles upon `.dvcignore` file inside a dependency or an +output directory, it raises an error. Ignoring files inside such +directory should be handled from `.dvcignore` file from upper levels of the +project tree. ## Syntax @@ -91,7 +96,7 @@ Pipelines are up to date. Nothing to reproduce. ``` Same modification applied to not ignored file will make `dvc status` inform -about change. +about change: ```dvc $ echo "123" >> data/data2 @@ -124,7 +129,7 @@ $ dvc add data ``` If we move not ignored data, DVC will behave as if we modified data directory by -adding new file. +adding new file: ```dvc $ dvc status