From 28fc566a991a04795a9920bdddb7f3a7374b11c9 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 20 Apr 2023 13:33:02 -0600 Subject: [PATCH 1/6] fs: make readdir recursive algorithm iterative --- lib/fs.js | 92 ++++++++++++++++++++++++++++-------------- lib/internal/fs/dir.js | 2 - 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/lib/fs.js b/lib/fs.js index 99a8ce0f764778..e4afbbefa5326e 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -26,6 +26,7 @@ const { ArrayPrototypePush, + ArrayPrototypeShift, BigIntPrototypeToString, MathMax, Number, @@ -140,7 +141,6 @@ const { validateObject, validateString, } = require('internal/validators'); - let truncateWarn = true; let fs; @@ -1404,34 +1404,64 @@ function mkdirSync(path, options) { } } -// TODO(Ethan-Arrowood): Make this iterative too -function readdirSyncRecursive(path, origPath, options) { - nullCheck(path, 'path', true); - const ctx = { path }; - const result = binding.readdir(pathModule.toNamespacedPath(path), - options.encoding, !!options.withFileTypes, undefined, ctx); - handleErrorFromBinding(ctx); - return options.withFileTypes ? - getDirents(path, result).flatMap((dirent) => { - return [ - dirent, - ...(dirent.isDirectory() ? - readdirSyncRecursive( - pathModule.join(path, dirent.name), - origPath, - options, - ) : []), - ]; - }) : - result.flatMap((ent) => { - const innerPath = pathModule.join(path, ent); - const relativePath = pathModule.relative(origPath, innerPath); - const stat = binding.internalModuleStat(innerPath); - return [ - relativePath, - ...(stat === 1 ? readdirSyncRecursive(innerPath, origPath, options) : []), - ]; - }); +/** + * An iterative algorithm for reading the entire contents of the `basePath` directory. + * This function does not validate `basePath` as a directory. It is passed directly to + * `binding.readdir` after a `nullCheck`. + * @param {string} basePath + * @param {{ encoding: string, withFileTypes: boolean }} options + * @returns {Array | Array} + */ +function readdirSyncRecursive(basePath, options) { + nullCheck(basePath, 'path', true); + + const results = []; + const opsQueue = []; + + function _read(_path) { + const ctx = { _path }; + const result = binding.readdir( + pathModule.toNamespacedPath(_path), + options.encoding, + !!options.withFileTypes, + undefined, + ctx, + ); + handleErrorFromBinding(ctx); + + if (options.withFileTypes) { + const dirents = getDirents(_path, result); + for (let i = 0; i < dirents.length; i++) { + const dirent = dirents[i]; + ArrayPrototypePush(results, dirent); + if (dirent.isDirectory()) { + ArrayPrototypePush(opsQueue, curryRead(pathModule.join(dirent.path, dirent.name))); + } + } + } else { + for (let i = 0; i < result.length; i++) { + const ent = result[i]; + const innerPath = pathModule.join(_path, ent); + const relativePath = pathModule.relative(basePath, innerPath); + const stat = binding.internalModuleStat(innerPath); + ArrayPrototypePush(results, relativePath); + if (stat === 1) { + ArrayPrototypePush(opsQueue, curryRead(innerPath)); + } + } + } + } + + const curryRead = (p) => () => _read(p); + + ArrayPrototypePush(opsQueue, curryRead(basePath)); + + while (opsQueue.length !== 0) { + const op = ArrayPrototypeShift(opsQueue); + op(); + } + + return results; } /** @@ -1456,7 +1486,7 @@ function readdir(path, options, callback) { } if (options.recursive) { - callback(null, readdirSyncRecursive(path, path, options)); + callback(null, readdirSyncRecursive(path, options)); return; } @@ -1494,7 +1524,7 @@ function readdirSync(path, options) { } if (options.recursive) { - return readdirSyncRecursive(path, path, options); + return readdirSyncRecursive(path, options); } const ctx = { path }; diff --git a/lib/internal/fs/dir.js b/lib/internal/fs/dir.js index 6a9114adace50b..ec0562843d5f5c 100644 --- a/lib/internal/fs/dir.js +++ b/lib/internal/fs/dir.js @@ -160,8 +160,6 @@ class Dir { } } - // TODO(Ethan-Arrowood): Review this implementation. Make it iterative. - // Can we better leverage the `kDirOperationQueue`? readSyncRecursive(dirent) { const ctx = { path: dirent.path }; const handle = dirBinding.opendir( From f15aa53f45bc9894918696c4597909e99018abc6 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 20 Apr 2023 16:21:46 -0600 Subject: [PATCH 2/6] review suggestions --- lib/fs.js | 50 ++++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/lib/fs.js b/lib/fs.js index e4afbbefa5326e..c9370d24ac3715 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -28,6 +28,7 @@ const { ArrayPrototypePush, ArrayPrototypeShift, BigIntPrototypeToString, + Boolean, MathMax, Number, ObjectDefineProperties, @@ -141,6 +142,7 @@ const { validateObject, validateString, } = require('internal/validators'); + let truncateWarn = true; let fs; @@ -1410,58 +1412,54 @@ function mkdirSync(path, options) { * `binding.readdir` after a `nullCheck`. * @param {string} basePath * @param {{ encoding: string, withFileTypes: boolean }} options - * @returns {Array | Array} + * @returns {string[] | Dirent[]} */ function readdirSyncRecursive(basePath, options) { nullCheck(basePath, 'path', true); - const results = []; - const opsQueue = []; + const readdirResults = []; + const pathsQueue = [basePath]; - function _read(_path) { - const ctx = { _path }; - const result = binding.readdir( - pathModule.toNamespacedPath(_path), + function read(path) { + const ctx = { path }; + const readdirResult = binding.readdir( + pathModule.toNamespacedPath(path), options.encoding, - !!options.withFileTypes, + Boolean(options.withFileTypes), undefined, ctx, ); handleErrorFromBinding(ctx); if (options.withFileTypes) { - const dirents = getDirents(_path, result); + const dirents = getDirents(path, readdirResult); for (let i = 0; i < dirents.length; i++) { const dirent = dirents[i]; - ArrayPrototypePush(results, dirent); + ArrayPrototypePush(readdirResults, dirent); if (dirent.isDirectory()) { - ArrayPrototypePush(opsQueue, curryRead(pathModule.join(dirent.path, dirent.name))); + ArrayPrototypePush(pathsQueue, pathModule.join(dirent.path, dirent.name)); } } } else { - for (let i = 0; i < result.length; i++) { - const ent = result[i]; - const innerPath = pathModule.join(_path, ent); - const relativePath = pathModule.relative(basePath, innerPath); - const stat = binding.internalModuleStat(innerPath); - ArrayPrototypePush(results, relativePath); + for (let i = 0; i < readdirResult.length; i++) { + const resultPath = pathModule.join(path, readdirResult[i]); + const relativeResultPath = pathModule.relative(basePath, resultPath); + const stat = binding.internalModuleStat(resultPath); + ArrayPrototypePush(readdirResults, relativeResultPath); + // 1 indicates directory if (stat === 1) { - ArrayPrototypePush(opsQueue, curryRead(innerPath)); + ArrayPrototypePush(pathsQueue, resultPath); } } } } - const curryRead = (p) => () => _read(p); - - ArrayPrototypePush(opsQueue, curryRead(basePath)); - - while (opsQueue.length !== 0) { - const op = ArrayPrototypeShift(opsQueue); - op(); + while (pathsQueue.length > 0) { + const path = ArrayPrototypeShift(pathsQueue); + read(path); } - return results; + return readdirResults; } /** From 57f16be97f8c1c4b26743718976f69ddb39b36a1 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Fri, 21 Apr 2023 06:47:57 -0600 Subject: [PATCH 3/6] Update lib/fs.js Co-authored-by: mscdex --- lib/fs.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/fs.js b/lib/fs.js index c9370d24ac3715..0cf7227d6fe904 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -1454,10 +1454,8 @@ function readdirSyncRecursive(basePath, options) { } } - while (pathsQueue.length > 0) { - const path = ArrayPrototypeShift(pathsQueue); - read(path); - } + while (pathsQueue.length > 0) + read(ArrayPrototypeShift(pathsQueue)); return readdirResults; } From 36bad0f84ff787190f2c57a5eeb8ff10503aca31 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 24 Apr 2023 13:54:38 -0600 Subject: [PATCH 4/6] Update lib/fs.js Co-authored-by: Antoine du Hamel --- lib/fs.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/fs.js b/lib/fs.js index 0cf7227d6fe904..8156605a4b413f 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -1454,8 +1454,9 @@ function readdirSyncRecursive(basePath, options) { } } - while (pathsQueue.length > 0) - read(ArrayPrototypeShift(pathsQueue)); + for (let i = 0; i < pathsQueue.length; i++) { + read(pathsQueue[i]); + } return readdirResults; } From 880316529fbc8549977a73ef85d3fc630d3a86ff Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 25 Apr 2023 10:47:06 -0600 Subject: [PATCH 5/6] Update lib/fs.js Co-authored-by: Antoine du Hamel --- lib/fs.js | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/fs.js b/lib/fs.js index 8156605a4b413f..deb8fdf094799e 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -26,7 +26,6 @@ const { ArrayPrototypePush, - ArrayPrototypeShift, BigIntPrototypeToString, Boolean, MathMax, From 679d8cd6bda0d0724ffdabf43f7fb4535c1c1732 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 25 Apr 2023 11:48:05 -0600 Subject: [PATCH 6/6] make it even faster --- lib/fs.js | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/fs.js b/lib/fs.js index deb8fdf094799e..c4598dd82037d2 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -98,6 +98,7 @@ const { copyObject, Dirent, emitRecursiveRmdirWarning, + getDirent, getDirents, getOptions, getValidatedFd, @@ -1416,31 +1417,32 @@ function mkdirSync(path, options) { function readdirSyncRecursive(basePath, options) { nullCheck(basePath, 'path', true); + const withFileTypes = Boolean(options.withFileTypes); + const encoding = options.encoding; + const readdirResults = []; const pathsQueue = [basePath]; + const ctx = { path: basePath }; function read(path) { - const ctx = { path }; + ctx.path = path; const readdirResult = binding.readdir( pathModule.toNamespacedPath(path), - options.encoding, - Boolean(options.withFileTypes), + encoding, + withFileTypes, undefined, ctx, ); handleErrorFromBinding(ctx); - if (options.withFileTypes) { - const dirents = getDirents(path, readdirResult); - for (let i = 0; i < dirents.length; i++) { - const dirent = dirents[i]; + for (let i = 0; i < readdirResult.length; i++) { + if (withFileTypes) { + const dirent = getDirent(path, readdirResult[0][i], readdirResult[1][i]); ArrayPrototypePush(readdirResults, dirent); if (dirent.isDirectory()) { ArrayPrototypePush(pathsQueue, pathModule.join(dirent.path, dirent.name)); } - } - } else { - for (let i = 0; i < readdirResult.length; i++) { + } else { const resultPath = pathModule.join(path, readdirResult[i]); const relativeResultPath = pathModule.relative(basePath, resultPath); const stat = binding.internalModuleStat(resultPath);