diff --git a/src/Documentation/glossary.js b/src/Documentation/glossary.js index dbc06e458a..0c6504bf22 100644 --- a/src/Documentation/glossary.js +++ b/src/Documentation/glossary.js @@ -37,6 +37,13 @@ export default { desc: 'Stage (DVC-file) created with the `dvc import` or `dvc import-url` ' + 'commands. They represent files or directories from external sources.' + }, + { + name: 'Output', + match: ['output', 'outputs'], + desc: + 'A file or a directory that is under DVC control. See `dvc add`,' + + ' `dvc run`, `dvc import`, `dvc import-url` commands.' } ] } diff --git a/static/docs/user-guide/dvcignore.md b/static/docs/user-guide/dvcignore.md index bf525362e7..cae3fdb29a 100644 --- a/static/docs/user-guide/dvcignore.md +++ b/static/docs/user-guide/dvcignore.md @@ -1,43 +1,150 @@ -# dvcignore File +# .dvcignore File Marks which files and/or directories should be ignored when traversing repository. -Sometimes you might want DVC to ignore files while traversing the project -directory. For example, when working on a project with many files in its data +Sometimes you might want DVC to ignore some files while working with the +project. For example, when working on a project with many files in its data directory, you might encounter extended execution time for operations that are -as simple as `dvc status`. To prevent this, we are implementing `.dvcignore` -files handling. When fully implemented, their implementation is intended to -provide similar functionality as `.gitignore` files provide for `git`. +as simple as `dvc status`. In other case you might want to omit files or folders +unrelated to the project (like `.DS_Store` on Mac). To address these +requirements we are implementing `.dvcignore` files handling. `.dvcignore` by +design works similar way as `.gitignore` does. ## How does it work? -- You need to create `.dvcignore` file; +- You need to create the `.dvcignore` file. It can be placed in the root of the + project or inside any subdirectory (see also [remarks](#Remarks) below). - Populate it with [patterns](https://git-scm.com/docs/gitignore) that you would - like to ignore; -- Each line should contain only one pattern; + like to ignore. +- Each line should contain only one pattern. - During execution of commands that traverse directories, DVC will ignore - matching paths; -- Not every operation supports `.dvcignore`. To see current limitations, read - following paragraph. + matching paths. -## Current limitations +## Remarks -During development, we noticed that there are few potential uses cases that -might be tricky to handle (e.g. what to do when we are `dvc add`-ing directory -containing `.dvcignore` file). Therefore, we decided to enable this feature -gradually in different parts of the project. +Ignored files will not be saved in cache, they will be non-existent for DVC. +It's worth to remember that, especially when ignoring files inside DVC-handled +directories. -Currently `.dvcignore` files will be read and applied in any operation that -collects DVC-files (e.g. `checkout`, `metrics`, `status`, `run`, `repro`), so it -is advised to use it in cases described in the first paragraph, when amount of -files in tree of repository directory causes performance issues. +**It is crucial to understand, that DVC might remove ignored files upon `dvc +run` or `dvc repro`. If they are not produced by a +[pipeline](/doc/get-started/pipeline) step, they can be deleted permanently.** + +Keep in mind, that when you add to `.dvcignore` entries that affect one of the +existing outputs, its status will change and DVC will behave as if +that affected files were deleted. + +If DVC stumbles upon `.dvcignore` file inside a dependency or an +output directory, it raises an error. Ignoring files inside such +directory should be handled from `.dvcignore` file from upper levels of the +project tree. ## Syntax The same as for [`.gitignore`](https://git-scm.com/docs/gitignore). -## Example +## Examples: Modification of ignored data + +Lets see if what happens when we modify ignored file. + +```dvc +$ mkdir data +$ echo data1 >> data/data1 +$ echo data2 >> data/data2 +$ tree . + +. +└── data + ├── data1 + └── data2 +``` + +We created the `data` directory. Lets ignore part of the `data` and add it under +DVC control. + +```dvc +$ echo data/data1 >> .dvcignore +$ cat .dvcignore + +data/data1 + +$ dvc add data +$ tree .dvc/cache + +.dvc/cache +├── 54 +│   └── 40cb5e4c57ab54af68127492334a23.dir +└── ed + └── c3d3797971f12c7f5e1d106dd5cee2 +``` + +As we can see, `data1` has been ignored. Cache contains only one file entry (for +`data2`) and one dir entry (`data`). + +Now, lets modify `data1` and see if it affects `dvc status`. + +```dvc +$ dvc status + +Pipelines are up to date. Nothing to reproduce. + +$ echo "123" >> data/data1 +$ dvc status + +Pipelines are up to date. Nothing to reproduce. +``` + +Same modification applied to not ignored file will make `dvc status` inform +about change: + +```dvc +$ echo "123" >> data/data2 +$ dvc status + +data.dvc: + changed outs: + modified: data +``` + +## Examples: Moving ignored data + +```dvc +$ mkdir data +$ echo data1 >> data/data1 +$ echo data2 >> data/data2 +$ tree . + +. +└── data + ├── data1 + └── data2 + +$ echo data/data1 >> .dvcignore +$ cat .dvcignore + +data/data1 + +$ dvc add data +``` + +If we move not ignored data, DVC will behave as if we modified data directory by +adding new file: + +```dvc +$ dvc status + +Pipelines are up to date. Nothing to reproduce. + +$ mv data/data1 data/data3 +$ dvc status + +data.dvc: + changed outs: + modified: data +``` + +## Examples: Ignore dvc controlled file Lets analyze an example project: