Skip to content

Commit 3e53c92

Browse files
committed
Fix unicode errors on Windows in path_is_dir, path_exists, getcwd and rust_localtime.
This make these functions use wchar_t version of APIs, instead of char version.
1 parent a1848bc commit 3e53c92

File tree

5 files changed

+148
-9
lines changed

5 files changed

+148
-9
lines changed

src/libstd/libc.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -1208,7 +1208,7 @@ pub mod consts {
12081208
pub static ERROR_ALREADY_EXISTS : c_int = 183;
12091209
pub static ERROR_INVALID_ADDRESS : c_int = 487;
12101210
pub static ERROR_FILE_INVALID : c_int = 1006;
1211-
pub static INVALID_HANDLE_VALUE: c_int = -1;
1211+
pub static INVALID_HANDLE_VALUE : c_int = -1;
12121212

12131213
pub static DELETE : DWORD = 0x00010000;
12141214
pub static READ_CONTROL : DWORD = 0x00020000;
@@ -3352,11 +3352,14 @@ pub mod funcs {
33523352
LPSECURITY_ATTRIBUTES)
33533353
-> BOOL;
33543354
pub fn CopyFileW(lpExistingFileName: LPCWSTR,
3355-
lpNewFileName: LPCWSTR,
3356-
bFailIfExists: BOOL)
3357-
-> BOOL;
3355+
lpNewFileName: LPCWSTR,
3356+
bFailIfExists: BOOL)
3357+
-> BOOL;
33583358
pub fn DeleteFileW(lpPathName: LPCWSTR) -> BOOL;
33593359
pub fn RemoveDirectoryW(lpPathName: LPCWSTR) -> BOOL;
3360+
pub fn GetCurrentDirectoryW(nBufferLength: DWORD,
3361+
lpBuffer: LPWSTR)
3362+
-> DWORD;
33603363
pub fn SetCurrentDirectoryW(lpPathName: LPCWSTR) -> BOOL;
33613364
pub fn GetLastError() -> DWORD;
33623365
pub fn FindFirstFileW(fileName: *u16, findFileData: HANDLE)
@@ -3462,6 +3465,9 @@ pub mod funcs {
34623465
-> BOOL;
34633466
pub fn DeleteFileW(lpPathName: LPCWSTR) -> BOOL;
34643467
pub fn RemoveDirectoryW(lpPathName: LPCWSTR) -> BOOL;
3468+
pub fn GetCurrentDirectoryW(nBufferLength: DWORD,
3469+
lpBuffer: LPWSTR)
3470+
-> DWORD;
34653471
pub fn SetCurrentDirectoryW(lpPathName: LPCWSTR) -> BOOL;
34663472
pub fn GetLastError() -> DWORD;
34673473
pub fn FindFirstFileW(fileName: *u16, findFileData: HANDLE)

src/libstd/os.rs

+86-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828

2929
#[allow(missing_doc)];
3030

31-
use c_str::{CString, ToCStr};
31+
#[cfg(unix)]
32+
use c_str::CString;
3233
use clone::Clone;
3334
use container::Container;
3435
use io;
@@ -56,6 +57,11 @@ pub fn close(fd: c_int) -> c_int {
5657
}
5758
}
5859

60+
// On Windows, wide character version of function must be used to support
61+
// unicode, so functions should be split into at least two versions,
62+
// which are for Windows and for non-Windows, if necessary.
63+
// See https://github.com/mozilla/rust/issues/9822 for more information.
64+
5965
pub mod rustrt {
6066
use libc::{c_char, c_int};
6167
use libc;
@@ -64,11 +70,19 @@ pub mod rustrt {
6470
pub fn rust_path_is_dir(path: *libc::c_char) -> c_int;
6571
pub fn rust_path_exists(path: *libc::c_char) -> c_int;
6672
}
73+
74+
// Uses _wstat instead of stat.
75+
#[cfg(windows)]
76+
extern {
77+
pub fn rust_path_is_dir_u16(path: *u16) -> c_int;
78+
pub fn rust_path_exists_u16(path: *u16) -> c_int;
79+
}
6780
}
6881

6982
pub static TMPBUF_SZ : uint = 1000u;
7083
static BUF_BYTES : uint = 2048u;
7184

85+
#[cfg(unix)]
7286
pub fn getcwd() -> Path {
7387
#[fixed_stack_segment]; #[inline(never)];
7488
let mut buf = [0 as libc::c_char, ..BUF_BYTES];
@@ -83,6 +97,22 @@ pub fn getcwd() -> Path {
8397
}
8498
}
8599

100+
#[cfg(windows)]
101+
pub fn getcwd() -> Path {
102+
#[fixed_stack_segment]; #[inline(never)];
103+
use libc::DWORD;
104+
use libc::GetCurrentDirectoryW;
105+
let mut buf = [0 as u16, ..BUF_BYTES];
106+
do buf.as_mut_buf |buf, len| {
107+
unsafe {
108+
if libc::GetCurrentDirectoryW(len as DWORD, buf) == 0 as DWORD {
109+
fail2!();
110+
}
111+
}
112+
}
113+
Path::new(str::from_utf16(buf))
114+
}
115+
86116
#[cfg(windows)]
87117
pub mod win32 {
88118
use libc;
@@ -613,6 +643,7 @@ pub fn walk_dir(p: &Path, f: &fn(&Path) -> bool) -> bool {
613643
})
614644
}
615645
646+
#[cfg(unix)]
616647
/// Indicates whether a path represents a directory
617648
pub fn path_is_dir(p: &Path) -> bool {
618649
#[fixed_stack_segment]; #[inline(never)];
@@ -623,6 +654,18 @@ pub fn path_is_dir(p: &Path) -> bool {
623654
}
624655
}
625656
657+
658+
#[cfg(windows)]
659+
pub fn path_is_dir(p: &Path) -> bool {
660+
#[fixed_stack_segment]; #[inline(never)];
661+
unsafe {
662+
do os::win32::as_utf16_p(p.as_str().unwrap()) |buf| {
663+
rustrt::rust_path_is_dir_u16(buf) != 0 as c_int
664+
}
665+
}
666+
}
667+
668+
#[cfg(unix)]
626669
/// Indicates whether a path exists
627670
pub fn path_exists(p: &Path) -> bool {
628671
#[fixed_stack_segment]; #[inline(never)];
@@ -633,6 +676,16 @@ pub fn path_exists(p: &Path) -> bool {
633676
}
634677
}
635678
679+
#[cfg(windows)]
680+
pub fn path_exists(p: &Path) -> bool {
681+
#[fixed_stack_segment]; #[inline(never)];
682+
unsafe {
683+
do os::win32::as_utf16_p(p.as_str().unwrap()) |buf| {
684+
rustrt::rust_path_exists_u16(buf) != 0 as c_int
685+
}
686+
}
687+
}
688+
636689
/**
637690
* Convert a relative path to an absolute path
638691
*
@@ -1922,15 +1975,47 @@ mod tests {
19221975
19231976
#[test]
19241977
fn path_is_dir() {
1978+
use rt::io::file::open;
1979+
use rt::io::{OpenOrCreate, Read};
1980+
19251981
assert!((os::path_is_dir(&Path::new("."))));
19261982
assert!((!os::path_is_dir(&Path::new("test/stdtest/fs.rs"))));
1983+
1984+
let mut dirpath = os::tmpdir();
1985+
dirpath.push(format!("rust-test-{}/test-\uac00\u4e00\u30fc\u4f60\u597d",
1986+
rand::random::<u32>())); // 가一ー你好
1987+
debug2!("path_is_dir dirpath: {}", dirpath.display());
1988+
1989+
let mkdir_result = os::mkdir_recursive(&dirpath, (S_IRUSR | S_IWUSR | S_IXUSR) as i32);
1990+
debug2!("path_is_dir mkdir_result: {}", mkdir_result);
1991+
1992+
assert!((os::path_is_dir(&dirpath)));
1993+
1994+
let mut filepath = dirpath;
1995+
filepath.push("unicode-file-\uac00\u4e00\u30fc\u4f60\u597d.rs");
1996+
debug2!("path_is_dir filepath: {}", filepath.display());
1997+
1998+
open(&filepath, OpenOrCreate, Read); // ignore return; touch only
1999+
assert!((!os::path_is_dir(&filepath)));
2000+
2001+
assert!((!os::path_is_dir(&Path::new(
2002+
"test/unicode-bogus-dir-\uac00\u4e00\u30fc\u4f60\u597d"))));
19272003
}
19282004
19292005
#[test]
19302006
fn path_exists() {
19312007
assert!((os::path_exists(&Path::new("."))));
19322008
assert!((!os::path_exists(&Path::new(
19332009
"test/nonexistent-bogus-path"))));
2010+
2011+
let mut dirpath = os::tmpdir();
2012+
dirpath.push(format!("rust-test-{}/test-\uac01\u4e01\u30fc\u518d\u89c1",
2013+
rand::random::<u32>())); // 각丁ー再见
2014+
2015+
os::mkdir_recursive(&dirpath, (S_IRUSR | S_IWUSR | S_IXUSR) as i32);
2016+
assert!((os::path_exists(&dirpath)));
2017+
assert!((!os::path_exists(&Path::new(
2018+
"test/unicode-bogus-path-\uac01\u4e01\u30fc\u518d\u89c1"))));
19342019
}
19352020
19362021
#[test]

src/rt/rust_builtin.cpp

+42-4
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ rust_list_dir_wfd_fp_buf(void* wfd) {
107107
#endif
108108

109109
extern "C" CDECL int
110-
rust_path_is_dir(char *path) {
110+
rust_path_is_dir(const char *path) {
111111
struct stat buf;
112112
if (stat(path, &buf)) {
113113
return 0;
@@ -116,14 +116,48 @@ rust_path_is_dir(char *path) {
116116
}
117117

118118
extern "C" CDECL int
119-
rust_path_exists(char *path) {
119+
#if defined(__WIN32__)
120+
rust_path_is_dir_u16(const wchar_t *path) {
121+
struct _stat buf;
122+
// Don't use GetFileAttributesW, it cannot get attributes of
123+
// some system files (e.g. pagefile.sys).
124+
if (_wstat(path, &buf)) {
125+
return 0;
126+
}
127+
return S_ISDIR(buf.st_mode);
128+
}
129+
#else
130+
rust_path_is_dir_u16(const void *path) {
131+
// Wide version of function is only used on Windows.
132+
return 0;
133+
}
134+
#endif
135+
136+
extern "C" CDECL int
137+
rust_path_exists(const char *path) {
120138
struct stat buf;
121139
if (stat(path, &buf)) {
122140
return 0;
123141
}
124142
return 1;
125143
}
126144

145+
extern "C" CDECL int
146+
#if defined(__WIN32__)
147+
rust_path_exists_u16(const wchar_t *path) {
148+
struct _stat buf;
149+
if (_wstat(path, &buf)) {
150+
return 0;
151+
}
152+
return 1;
153+
}
154+
#else
155+
rust_path_exists_u16(const void *path) {
156+
// Wide version of function is only used on Windows.
157+
return 0;
158+
}
159+
#endif
160+
127161
extern "C" CDECL FILE* rust_get_stdin() {return stdin;}
128162
extern "C" CDECL FILE* rust_get_stdout() {return stdout;}
129163
extern "C" CDECL FILE* rust_get_stderr() {return stderr;}
@@ -294,8 +328,12 @@ rust_localtime(int64_t sec, int32_t nsec, rust_tm *timeptr) {
294328
const char* zone = NULL;
295329
#if defined(__WIN32__)
296330
int32_t gmtoff = -timezone;
297-
char buffer[64];
298-
if (strftime(buffer, sizeof(buffer), "%Z", &tm) > 0) {
331+
wchar_t wbuffer[64];
332+
char buffer[256];
333+
// strftime("%Z") can contain non-UTF-8 characters on non-English locale (issue #9418),
334+
// so time zone should be converted from UTF-16 string set by wcsftime.
335+
if (wcsftime(wbuffer, sizeof(wbuffer) / sizeof(wchar_t), L"%Z", &tm) > 0) {
336+
WideCharToMultiByte(CP_UTF8, 0, wbuffer, -1, buffer, sizeof(buffer), NULL, NULL);
299337
zone = buffer;
300338
}
301339
#else

src/rt/rust_globals.h

+8
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@
5454
#include <assert.h>
5555

5656
#if defined(__WIN32__)
57+
// Prevent unnecessary #include's from <windows.h>
58+
#ifndef WIN32_LEAN_AND_MEAN
59+
#define WIN32_LEAN_AND_MEAN
60+
#endif
61+
// Prevent defining min and max macro
62+
#ifndef NOMINMAX
63+
#define NOMINMAX
64+
#endif
5765
extern "C" {
5866
#include <windows.h>
5967
#include <tchar.h>

src/rt/rustrt.def.in

+2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ rust_timegm
1010
rust_mktime
1111
precise_time_ns
1212
rust_path_is_dir
13+
rust_path_is_dir_u16
1314
rust_path_exists
15+
rust_path_exists_u16
1416
rust_get_stdin
1517
rust_get_stdout
1618
rust_get_stderr

0 commit comments

Comments
 (0)