Skip to content

Commit e67b009

Browse files
hknielsencopybara-github
authored andcommitted
Proper fix utf8 command line arguments (#14253)
#14197 Tried to fix utf-8 issue, but it didnt handle multibyte chars. Only way I found that works constantly is using `CommandLineToArgvW`. To not ripple out `wchar_t`, I convert to and from where needed Closes #14253 COPYBARA_INTEGRATE_REVIEW=#14253 from hknielsen:proper-fix-none-ascii-issue cad753e FUTURE_COPYBARA_INTEGRATE_REVIEW=#14253 from hknielsen:proper-fix-none-ascii-issue cad753e PiperOrigin-RevId: 599826579
1 parent f0ccf26 commit e67b009

File tree

4 files changed

+43
-3
lines changed

4 files changed

+43
-3
lines changed

build_defs/cpp_opts.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ LINK_OPTS = select({
3636
"//build_defs:config_msvc": [
3737
# Suppress linker warnings about files with no symbols defined.
3838
"-ignore:4221",
39+
"Shell32.lib",
3940
],
4041
"@platforms//os:macos": [
4142
"-lpthread",

src/google/protobuf/compiler/command_line_interface.cc

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,10 +1766,23 @@ bool CommandLineInterface::MakeInputsBeProtoPathRelative(
17661766

17671767

17681768
bool CommandLineInterface::ExpandArgumentFile(
1769-
const std::string& file, std::vector<std::string>* arguments) {
1769+
const char* file, std::vector<std::string>* arguments) {
1770+
// On windows to force ifstream to handle proper utr-8, we need to convert to
1771+
// proper supported utf8 wstring. If we dont then the file can't be opened.
1772+
#ifdef _MSC_VER
1773+
// Convert the file name to wide chars.
1774+
int size = MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), NULL, 0);
1775+
std::wstring file_str;
1776+
file_str.resize(size);
1777+
MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), &file_str[0],
1778+
file_str.size());
1779+
#else
1780+
std::string file_str(file);
1781+
#endif
1782+
17701783
// The argument file is searched in the working directory only. We don't
17711784
// use the proto import path here.
1772-
std::ifstream file_stream(file.c_str());
1785+
std::ifstream file_stream(file_str.c_str());
17731786
if (!file_stream.is_open()) {
17741787
return false;
17751788
}

src/google/protobuf/compiler/command_line_interface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ class PROTOC_EXPORT CommandLineInterface {
240240

241241
// Read an argument file and append the file's content to the list of
242242
// arguments. Return false if the file cannot be read.
243-
bool ExpandArgumentFile(const std::string& file,
243+
bool ExpandArgumentFile(const char* file,
244244
std::vector<std::string>* arguments);
245245

246246
// Parses a command-line argument into a name/value pair. Returns

src/google/protobuf/compiler/main.cc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
// Must be included last.
2222
#include "google/protobuf/port_def.inc"
2323

24+
#ifdef _MSC_VER
25+
#include <windows.h>
26+
#endif
27+
2428
namespace google {
2529
namespace protobuf {
2630
namespace compiler {
@@ -101,6 +105,28 @@ int ProtobufMain(int argc, char* argv[]) {
101105
} // namespace protobuf
102106
} // namespace google
103107

108+
#ifdef _MSC_VER
109+
std::string ToMultiByteUtf8String(const wchar_t* input) {
110+
int size =
111+
WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), 0, 0, NULL, NULL);
112+
std::string result(size, 0);
113+
if (size)
114+
WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), &result[0], size,
115+
NULL, NULL);
116+
return result;
117+
}
118+
104119
int main(int argc, char* argv[]) {
120+
wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc);
121+
char** argv_mbcs = new char*[argc];
122+
for (int i = 0; i < argc; i++) {
123+
std::string* multibyte_string = new auto(ToMultiByteUtf8String(wargv[i]));
124+
argv_mbcs[i] = const_cast<char*>(multibyte_string->c_str());
125+
}
105126
return google::protobuf::compiler::ProtobufMain(argc, argv);
106127
}
128+
#else
129+
int main(int argc, char* argv[]) {
130+
return google::protobuf::compiler::ProtobufMain(argc, argv);
131+
}
132+
#endif

0 commit comments

Comments
 (0)