From 7ae193d19f15a92578a7e7cc088e320a65788df0 Mon Sep 17 00:00:00 2001 From: Antoine du Hamel Date: Fri, 18 Oct 2024 09:20:43 +0200 Subject: [PATCH] url: handle "unsafe" characters properly in `pathToFileURL` Co-authored-by: EarlyRiser42 PR-URL: https://github.com/nodejs/node/pull/54545 Fixes: https://github.com/nodejs/node/issues/54515 Reviewed-By: Benjamin Gruenbaum Reviewed-By: James M Snell Reviewed-By: Matteo Collina --- lib/internal/process/execution.js | 2 +- lib/internal/url.js | 94 +++++++++++++++---------- test/parallel/test-url-pathtofileurl.js | 13 ++-- 3 files changed, 67 insertions(+), 42 deletions(-) diff --git a/lib/internal/process/execution.js b/lib/internal/process/execution.js index 7834ce9aa74e8c..68b267b61c39b1 100644 --- a/lib/internal/process/execution.js +++ b/lib/internal/process/execution.js @@ -49,7 +49,7 @@ function tryGetCwd() { let evalIndex = 0; function getEvalModuleUrl() { - return pathToFileURL(`${process.cwd()}/[eval${++evalIndex}]`).href; + return `${pathToFileURL(process.cwd())}/[eval${++evalIndex}]`; } /** diff --git a/lib/internal/url.js b/lib/internal/url.js index b62766b02987d1..b05b75ac1c809c 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1498,44 +1498,75 @@ function fileURLToPath(path, options = kEmptyObject) { return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path); } -// The following characters are percent-encoded when converting from file path -// to URL: -// - %: The percent character is the only character not encoded by the -// `pathname` setter. -// - \: Backslash is encoded on non-windows platforms since it's a valid -// character but the `pathname` setters replaces it by a forward slash. -// - LF: The newline character is stripped out by the `pathname` setter. -// (See whatwg/url#419) -// - CR: The carriage return character is also stripped out by the `pathname` -// setter. -// - TAB: The tab character is also stripped out by the `pathname` setter. +// RFC1738 defines the following chars as "unsafe" for URLs +// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues const percentRegEx = /%/g; -const backslashRegEx = /\\/g; const newlineRegEx = /\n/g; const carriageReturnRegEx = /\r/g; const tabRegEx = /\t/g; -const questionRegex = /\?/g; +const quoteRegEx = /"/g; const hashRegex = /#/g; +const spaceRegEx = / /g; +const questionMarkRegex = /\?/g; +const openSquareBracketRegEx = /\[/g; +const backslashRegEx = /\\/g; +const closeSquareBracketRegEx = /]/g; +const caretRegEx = /\^/g; +const verticalBarRegEx = /\|/g; +const tildeRegEx = /~/g; function encodePathChars(filepath, options = kEmptyObject) { - const windows = options?.windows; - if (StringPrototypeIndexOf(filepath, '%') !== -1) + if (StringPrototypeIncludes(filepath, '%')) { filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25'); - // In posix, backslash is a valid character in paths: - if (!(windows ?? isWindows) && StringPrototypeIndexOf(filepath, '\\') !== -1) - filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C'); - if (StringPrototypeIndexOf(filepath, '\n') !== -1) + } + + if (StringPrototypeIncludes(filepath, '\t')) { + filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09'); + } + if (StringPrototypeIncludes(filepath, '\n')) { filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A'); - if (StringPrototypeIndexOf(filepath, '\r') !== -1) + } + if (StringPrototypeIncludes(filepath, '\r')) { filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D'); - if (StringPrototypeIndexOf(filepath, '\t') !== -1) - filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09'); + } + if (StringPrototypeIncludes(filepath, ' ')) { + filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20'); + } + if (StringPrototypeIncludes(filepath, '"')) { + filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22'); + } + if (StringPrototypeIncludes(filepath, '#')) { + filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23'); + } + if (StringPrototypeIncludes(filepath, '?')) { + filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F'); + } + if (StringPrototypeIncludes(filepath, '[')) { + filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B'); + } + // Back-slashes must be special-cased on Windows, where they are treated as path separator. + if (!options.windows && StringPrototypeIncludes(filepath, '\\')) { + filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C'); + } + if (StringPrototypeIncludes(filepath, ']')) { + filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D'); + } + if (StringPrototypeIncludes(filepath, '^')) { + filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E'); + } + if (StringPrototypeIncludes(filepath, '|')) { + filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C'); + } + if (StringPrototypeIncludes(filepath, '~')) { + filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E'); + } + return filepath; } function pathToFileURL(filepath, options = kEmptyObject) { - const windows = options?.windows; - if ((windows ?? isWindows) && StringPrototypeStartsWith(filepath, '\\\\')) { + const windows = options?.windows ?? isWindows; + if (windows && StringPrototypeStartsWith(filepath, '\\\\')) { const outURL = new URL('file://'); // UNC path format: \\server\share\resource // Handle extended UNC path and standard UNC path @@ -1566,20 +1597,9 @@ function pathToFileURL(filepath, options = kEmptyObject) { ); return outURL; } - let resolved = (windows ?? isWindows) ? path.win32.resolve(filepath) : path.posix.resolve(filepath); - - // Call encodePathChars first to avoid encoding % again for ? and #. - resolved = encodePathChars(resolved, { windows }); + const resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath); - // Question and hash character should be included in pathname. - // Therefore, encoding is required to eliminate parsing them in different states. - // This is done as an optimization to not creating a URL instance and - // later triggering pathname setter, which impacts performance - if (StringPrototypeIndexOf(resolved, '?') !== -1) - resolved = RegExpPrototypeSymbolReplace(questionRegex, resolved, '%3F'); - if (StringPrototypeIndexOf(resolved, '#') !== -1) - resolved = RegExpPrototypeSymbolReplace(hashRegex, resolved, '%23'); - return new URL(`file://${resolved}`); + return new URL(`file://${encodePathChars(resolved, { windows })}`); } function toPathIfFileURL(fileURLOrPath) { diff --git a/test/parallel/test-url-pathtofileurl.js b/test/parallel/test-url-pathtofileurl.js index 20609eb0ff5c9f..9c506e353f49e5 100644 --- a/test/parallel/test-url-pathtofileurl.js +++ b/test/parallel/test-url-pathtofileurl.js @@ -13,10 +13,7 @@ const url = require('url'); { const fileURL = url.pathToFileURL('test\\').href; assert.ok(fileURL.startsWith('file:///')); - if (isWindows) - assert.ok(fileURL.endsWith('/')); - else - assert.ok(fileURL.endsWith('%5C')); + assert.match(fileURL, isWindows ? /\/$/ : /%5C$/); } { @@ -104,6 +101,12 @@ const windowsTestCases = [ { path: 'C:\\€', expected: 'file:///C:/%E2%82%AC' }, // Rocket emoji (non-BMP code point) { path: 'C:\\🚀', expected: 'file:///C:/%F0%9F%9A%80' }, + // caret + { path: 'C:\\foo^bar', expected: 'file:///C:/foo%5Ebar' }, + // left bracket + { path: 'C:\\foo[bar', expected: 'file:///C:/foo%5Bbar' }, + // right bracket + { path: 'C:\\foo]bar', expected: 'file:///C:/foo%5Dbar' }, // Local extended path { path: '\\\\?\\C:\\path\\to\\file.txt', expected: 'file:///C:/path/to/file.txt' }, // UNC path (see https://docs.microsoft.com/en-us/archive/blogs/ie/file-uris-in-windows) @@ -154,6 +157,8 @@ const posixTestCases = [ { path: '/€', expected: 'file:///%E2%82%AC' }, // Rocket emoji (non-BMP code point) { path: '/🚀', expected: 'file:///%F0%9F%9A%80' }, + // "unsafe" chars + { path: '/foo\r\n\t<>"#%{}|^[\\~]`?bar', expected: 'file:///foo%0D%0A%09%3C%3E%22%23%25%7B%7D%7C%5E%5B%5C%7E%5D%60%3Fbar' }, ]; for (const { path, expected } of windowsTestCases) {