Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: use wide string for findPackageJson onWindows #55861

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 0 additions & 49 deletions src/node_file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3125,55 +3125,6 @@ static void GetFormatOfExtensionlessFile(
return args.GetReturnValue().Set(EXTENSIONLESS_FORMAT_JAVASCRIPT);
}

#ifdef _WIN32
std::wstring ConvertToWideString(const std::string& str) {
int size_needed = MultiByteToWideChar(
CP_UTF8, 0, &str[0], static_cast<int>(str.size()), nullptr, 0);
std::wstring wstrTo(size_needed, 0);
MultiByteToWideChar(CP_UTF8,
0,
&str[0],
static_cast<int>(str.size()),
&wstrTo[0],
size_needed);
return wstrTo;
}

#define BufferValueToPath(str) \
std::filesystem::path(ConvertToWideString(str.ToString()))

std::string ConvertWideToUTF8(const std::wstring& wstr) {
if (wstr.empty()) return std::string();

int size_needed = WideCharToMultiByte(CP_UTF8,
0,
&wstr[0],
static_cast<int>(wstr.size()),
nullptr,
0,
nullptr,
nullptr);
std::string strTo(size_needed, 0);
WideCharToMultiByte(CP_UTF8,
0,
&wstr[0],
static_cast<int>(wstr.size()),
&strTo[0],
size_needed,
nullptr,
nullptr);
return strTo;
}

#define PathToString(path) ConvertWideToUTF8(path.wstring());

#else // _WIN32

#define BufferValueToPath(str) std::filesystem::path(str.ToStringView());
#define PathToString(path) path.native();

#endif // _WIN32

static void CpSyncCheckPaths(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
Expand Down
16 changes: 6 additions & 10 deletions src/node_modules.cc
Original file line number Diff line number Diff line change
Expand Up @@ -337,14 +337,12 @@ void BindingData::GetNearestParentPackageJSON(
bool slashCheck = path_value.ToStringView().ends_with(kPathSeparator);

ToNamespacedPath(realm->env(), &path_value);

std::string path_value_str = path_value.ToString();
std::filesystem::path path = BufferValueToPath(path_value);
if (slashCheck) {
path_value_str.push_back(kPathSeparator);
path += kPathSeparator;
}

auto package_json =
TraverseParent(realm, std::filesystem::path(path_value_str));
auto package_json = TraverseParent(realm, path);

if (package_json != nullptr) {
args.GetReturnValue().Set(package_json->Serialize(realm));
Expand All @@ -363,14 +361,12 @@ void BindingData::GetNearestParentPackageJSONType(
bool slashCheck = path_value.ToStringView().ends_with(kPathSeparator);

ToNamespacedPath(realm->env(), &path_value);

std::string path_value_str = path_value.ToString();
std::filesystem::path path = BufferValueToPath(path_value);
if (slashCheck) {
path_value_str.push_back(kPathSeparator);
path += kPathSeparator;
}

auto package_json =
TraverseParent(realm, std::filesystem::path(path_value_str));
auto package_json = TraverseParent(realm, path);

if (package_json == nullptr) {
return;
Expand Down
39 changes: 39 additions & 0 deletions src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -885,4 +885,43 @@ v8::Maybe<int> GetValidFileMode(Environment* env,
return v8::Just(mode);
}

#ifdef _WIN32
std::wstring ConvertToWideString(const std::string& str) {
int size_needed = MultiByteToWideChar(
CP_UTF8, 0, &str[0], static_cast<int>(str.size()), nullptr, 0);
std::wstring wstrTo(size_needed, 0);
MultiByteToWideChar(CP_UTF8,
0,
&str[0],
static_cast<int>(str.size()),
&wstrTo[0],
size_needed);
return wstrTo;
}

std::string ConvertWideToUTF8(const std::wstring& wstr) {
if (wstr.empty()) return std::string();

int size_needed = WideCharToMultiByte(CP_UTF8,
0,
&wstr[0],
static_cast<int>(wstr.size()),
nullptr,
0,
nullptr,
nullptr);
std::string strTo(size_needed, 0);
WideCharToMultiByte(CP_UTF8,
0,
&wstr[0],
static_cast<int>(wstr.size()),
&strTo[0],
size_needed,
nullptr,
nullptr);
return strTo;
}

#endif // _WIN32

} // namespace node
22 changes: 22 additions & 0 deletions src/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
#include <unordered_map>
#include <utility>
#include <vector>
#ifdef _WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif

#ifdef __GNUC__
#define MUST_USE_RESULT __attribute__((warn_unused_result))
Expand Down Expand Up @@ -1013,6 +1018,23 @@ v8::Maybe<int> GetValidFileMode(Environment* env,
// case insensitive.
inline bool IsWindowsBatchFile(const char* filename);

#ifdef _WIN32
std::wstring ConvertToWideString(const std::string& str);

#define BufferValueToPath(str) \
std::filesystem::path(ConvertToWideString(str.ToString()))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These macros are unnecessary. Also, str.ToString() makes an unnecessary copy, even tho, the function is const std::string&. str.ToStringView() would remove the unnecessary copy here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I modified the code to avoid using macros and to use std::string_view instead.


std::string ConvertWideToUTF8(const std::wstring& wstr);

#define PathToString(path) ConvertWideToUTF8(path.wstring());

#else // _WIN32

#define BufferValueToPath(str) std::filesystem::path(str.ToStringView());
#define PathToString(path) path.native();

#endif // _WIN32

} // namespace node

#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
Expand Down
1 change: 1 addition & 0 deletions test/fixtures/全角文字/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
console.log('check non-ascii');
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During testing, it became evident that the occurrence of the error described in issue #55773 is influenced by the character encoding used by the user.

For instance, users working with CP1252 are unlikely to encounter any errors, regardless of the characters they use. In contrast, users with encodings like CP932 are more prone to experiencing errors.

Take the character "月" as an example, which is represented in UTF-8 as:
E6 9C 88

In CP932, the byte 88 is interpreted as the leading byte of a 2-byte character, potentially causing issues.

The key takeaway is that to ensure accurate regression testing, it might be necessary to test the runtime environment with character encodings other than CP1252.

I also looked for other tests that might include regression testing against Windows-specific character encodings but couldn’t find any. If you have any good ideas, I’d appreciate your input.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I understand, you’re looking for something along these lines:

const stdoutExec = execSync('@chcp 932 >nul & \"' + process.execPath + '\" \"' + nonAsciiPath + '\"', 
                              { encoding: 'utf8'});
assert.strictEqual(stdoutExec, 'check non-ascii\n');

However, I wanted to point out that this code does not fail when run on the main branch on my local machine. Because of this, it might not be ideal to use this approach directly without further investigation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your response.
I tried it as well, and it seems that the behavior was not affected.
It appears that we need to change the system's character encoding rather than just the console's encoding.

For instance, the following minimal C++ code reproduces the same error:

int main() {
  std::string path = "\x88";
  std::filesystem::path file(path);
}

This code alone triggers the same issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am currently investigating whether it is possible to change the system's character encoding within a cctest.

12 changes: 12 additions & 0 deletions test/parallel/test-non-ascii.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
'use strict';

const common = require('../common');
Copy link
Member

@RedYetiDev RedYetiDev Nov 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Common must be the first import. FWIW running make lint will catch this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry for the lint error.
I've fixed it.

const path = require('path');
const { test } = require('node:test');
const assert = require('node:assert');

test('Running from a directory with non-ASCII characters', async () => {
const nonAsciiPath = path.resolve(__dirname, '../fixtures/全角文字/index.js');
const { stdout } = await common.spawnPromisified(process.execPath, [nonAsciiPath]);
assert.strictEqual(stdout, 'check non-ascii\n');
});
Loading