Skip to content

Commit f7cdada

Browse files
panjf2000gopherbot
authored andcommitted
internal,os: employ copy_file_range(2) for file-to-file copying on FreeBSD
FreeBSD 13.0 introduced the Linux-compatible copy_file_range(2) system call, we should make use of it. Ref: https://www.gnu.org/software/gnulib/manual/html_node/copy_005ffile_005frange.html https://reviews.freebsd.org/D20584?id=60021 https://man.freebsd.org/cgi/man.cgi?copy_file_range(2) Change-Id: I75edb5629717289c8887be436613d3a8b3820bdc Reviewed-on: https://go-review.googlesource.com/c/go/+/604655 Run-TryBot: Andy Pan <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> TryBot-Result: Gopher Robot <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> Auto-Submit: Ian Lance Taylor <[email protected]>
1 parent 2693f77 commit f7cdada

14 files changed

+389
-102
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package poll
6+
7+
import (
8+
"internal/syscall/unix"
9+
"syscall"
10+
)
11+
12+
func supportCopyFileRange() bool {
13+
return unix.SupportCopyFileRange()
14+
}
15+
16+
// For best performance, call copy_file_range() with the largest len value
17+
// possible. It is interruptible on most file systems, so there is no penalty
18+
// for using very large len values, even SSIZE_MAX.
19+
const maxCopyFileRangeRound = 1<<31 - 1
20+
21+
func handleCopyFileRangeErr(err error, copied, written int64) (bool, error) {
22+
switch err {
23+
case syscall.ENOSYS:
24+
// The copy_file_range(2) function first appeared in FreeBSD 13.0.
25+
// Go supports FreeBSD>= 12, so the system call
26+
// may not be present. We've detected the FreeBSD version with
27+
// unix.SupportCopyFileRange() at the beginning of this function,
28+
// but we still want to check for ENOSYS here to prevent some rare
29+
// case like https://go.dev/issue/58592
30+
//
31+
// If we see ENOSYS, we have certainly not transferred
32+
// any data, so we can tell the caller that we
33+
// couldn't handle the transfer and let them fall
34+
// back to more generic code.
35+
return false, nil
36+
case syscall.EFBIG, syscall.EINVAL, syscall.EIO:
37+
// For EFBIG, the copy has exceeds the process's file size limit
38+
// or the maximum file size for the filesystem dst resides on, in
39+
// this case, we leave it to generic copy.
40+
//
41+
// For EINVAL, there could be a few reasons:
42+
// 1. Either dst or src refers to a file object that
43+
// is not a regular file, for instance, a pipe.
44+
// 2. src and dst refer to the same file and byte ranges
45+
// overlap.
46+
// 3. The flags argument is not 0.
47+
// Neither of these cases should be considered handled by
48+
// copy_file_range(2) because there is no data transfer, so
49+
// just fall back to generic copy.
50+
return false, nil
51+
}
52+
return true, err
53+
}

src/internal/poll/copy_file_range_linux.go

Lines changed: 50 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ import (
1010
"syscall"
1111
)
1212

13+
func supportCopyFileRange() bool {
14+
return isKernelVersionGE53()
15+
}
16+
1317
var isKernelVersionGE53 = sync.OnceValue(func() bool {
1418
major, minor := unix.KernelVersion()
1519
// copy_file_range(2) is broken in various ways on kernels older than 5.3,
@@ -20,102 +24,54 @@ var isKernelVersionGE53 = sync.OnceValue(func() bool {
2024

2125
const maxCopyFileRangeRound = 1 << 30
2226

23-
// CopyFileRange copies at most remain bytes of data from src to dst, using
24-
// the copy_file_range system call. dst and src must refer to regular files.
25-
func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) {
26-
if !isKernelVersionGE53() {
27-
return 0, false, nil
28-
}
29-
30-
for remain > 0 {
31-
max := remain
32-
if max > maxCopyFileRangeRound {
33-
max = maxCopyFileRangeRound
34-
}
35-
n, err := copyFileRange(dst, src, int(max))
36-
switch err {
37-
case syscall.ENOSYS:
38-
// copy_file_range(2) was introduced in Linux 4.5.
39-
// Go supports Linux >= 2.6.33, so the system call
40-
// may not be present.
41-
//
42-
// If we see ENOSYS, we have certainly not transferred
43-
// any data, so we can tell the caller that we
44-
// couldn't handle the transfer and let them fall
45-
// back to more generic code.
46-
return 0, false, nil
47-
case syscall.EXDEV, syscall.EINVAL, syscall.EIO, syscall.EOPNOTSUPP, syscall.EPERM:
48-
// Prior to Linux 5.3, it was not possible to
49-
// copy_file_range across file systems. Similarly to
50-
// the ENOSYS case above, if we see EXDEV, we have
51-
// not transferred any data, and we can let the caller
52-
// fall back to generic code.
53-
//
54-
// As for EINVAL, that is what we see if, for example,
55-
// dst or src refer to a pipe rather than a regular
56-
// file. This is another case where no data has been
57-
// transferred, so we consider it unhandled.
58-
//
59-
// If src and dst are on CIFS, we can see EIO.
60-
// See issue #42334.
61-
//
62-
// If the file is on NFS, we can see EOPNOTSUPP.
63-
// See issue #40731.
64-
//
65-
// If the process is running inside a Docker container,
66-
// we might see EPERM instead of ENOSYS. See issue
67-
// #40893. Since EPERM might also be a legitimate error,
68-
// don't mark copy_file_range(2) as unsupported.
69-
return 0, false, nil
70-
case nil:
71-
if n == 0 {
72-
// If we did not read any bytes at all,
73-
// then this file may be in a file system
74-
// where copy_file_range silently fails.
75-
// https://lore.kernel.org/linux-fsdevel/[email protected]/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
76-
if written == 0 {
77-
return 0, false, nil
78-
}
79-
// Otherwise src is at EOF, which means
80-
// we are done.
81-
return written, true, nil
27+
func handleCopyFileRangeErr(err error, copied, written int64) (bool, error) {
28+
switch err {
29+
case syscall.ENOSYS:
30+
// copy_file_range(2) was introduced in Linux 4.5.
31+
// Go supports Linux >= 2.6.33, so the system call
32+
// may not be present.
33+
//
34+
// If we see ENOSYS, we have certainly not transferred
35+
// any data, so we can tell the caller that we
36+
// couldn't handle the transfer and let them fall
37+
// back to more generic code.
38+
return false, nil
39+
case syscall.EXDEV, syscall.EINVAL, syscall.EIO, syscall.EOPNOTSUPP, syscall.EPERM:
40+
// Prior to Linux 5.3, it was not possible to
41+
// copy_file_range across file systems. Similarly to
42+
// the ENOSYS case above, if we see EXDEV, we have
43+
// not transferred any data, and we can let the caller
44+
// fall back to generic code.
45+
//
46+
// As for EINVAL, that is what we see if, for example,
47+
// dst or src refer to a pipe rather than a regular
48+
// file. This is another case where no data has been
49+
// transferred, so we consider it unhandled.
50+
//
51+
// If src and dst are on CIFS, we can see EIO.
52+
// See issue #42334.
53+
//
54+
// If the file is on NFS, we can see EOPNOTSUPP.
55+
// See issue #40731.
56+
//
57+
// If the process is running inside a Docker container,
58+
// we might see EPERM instead of ENOSYS. See issue
59+
// #40893. Since EPERM might also be a legitimate error,
60+
// don't mark copy_file_range(2) as unsupported.
61+
return false, nil
62+
case nil:
63+
if copied == 0 {
64+
// If we did not read any bytes at all,
65+
// then this file may be in a file system
66+
// where copy_file_range silently fails.
67+
// https://lore.kernel.org/linux-fsdevel/[email protected]/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
68+
if written == 0 {
69+
return false, nil
8270
}
83-
remain -= n
84-
written += n
85-
default:
86-
return written, true, err
87-
}
88-
}
89-
return written, true, nil
90-
}
9171

92-
// copyFileRange performs one round of copy_file_range(2).
93-
func copyFileRange(dst, src *FD, max int) (written int64, err error) {
94-
// The signature of copy_file_range(2) is:
95-
//
96-
// ssize_t copy_file_range(int fd_in, loff_t *off_in,
97-
// int fd_out, loff_t *off_out,
98-
// size_t len, unsigned int flags);
99-
//
100-
// Note that in the call to unix.CopyFileRange below, we use nil
101-
// values for off_in and off_out. For the system call, this means
102-
// "use and update the file offsets". That is why we must acquire
103-
// locks for both file descriptors (and why this whole machinery is
104-
// in the internal/poll package to begin with).
105-
if err := dst.writeLock(); err != nil {
106-
return 0, err
107-
}
108-
defer dst.writeUnlock()
109-
if err := src.readLock(); err != nil {
110-
return 0, err
111-
}
112-
defer src.readUnlock()
113-
var n int
114-
for {
115-
n, err = unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0)
116-
if err != syscall.EINTR {
117-
break
72+
// Otherwise src is at EOF, which means
73+
// we are done.
11874
}
11975
}
120-
return int64(n), err
76+
return true, err
12177
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build freebsd || linux
6+
7+
package poll
8+
9+
import (
10+
"internal/syscall/unix"
11+
"syscall"
12+
)
13+
14+
// CopyFileRange copies at most remain bytes of data from src to dst, using
15+
// the copy_file_range system call. dst and src must refer to regular files.
16+
func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) {
17+
if !supportCopyFileRange() {
18+
return 0, false, nil
19+
}
20+
21+
for remain > 0 {
22+
max := remain
23+
if max > maxCopyFileRangeRound {
24+
max = maxCopyFileRangeRound
25+
}
26+
n, e := copyFileRange(dst, src, int(max))
27+
if e == nil {
28+
remain -= n
29+
written += n
30+
}
31+
handled, err = handleCopyFileRangeErr(e, n, written)
32+
if n == 0 || !handled || err != nil {
33+
return
34+
}
35+
}
36+
37+
return written, true, nil
38+
}
39+
40+
// copyFileRange performs one round of copy_file_range(2).
41+
func copyFileRange(dst, src *FD, max int) (written int64, err error) {
42+
// For Linux, the signature of copy_file_range(2) is:
43+
//
44+
// ssize_t copy_file_range(int fd_in, loff_t *off_in,
45+
// int fd_out, loff_t *off_out,
46+
// size_t len, unsigned int flags);
47+
//
48+
// For FreeBSD, the signature of copy_file_range(2) is:
49+
//
50+
// ssize_t
51+
// copy_file_range(int infd, off_t *inoffp, int outfd, off_t *outoffp,
52+
// size_t len, unsigned int flags);
53+
//
54+
// Note that in the call to unix.CopyFileRange below, we use nil
55+
// values for off_in/off_out and inoffp/outoffp, which means "the file
56+
// offset for infd(fd_in) or outfd(fd_out) respectively will be used and
57+
// updated by the number of bytes copied".
58+
//
59+
// That is why we must acquire locks for both file descriptors (and why
60+
// this whole machinery is in the internal/poll package to begin with).
61+
if err := dst.writeLock(); err != nil {
62+
return 0, err
63+
}
64+
defer dst.writeUnlock()
65+
if err := src.readLock(); err != nil {
66+
return 0, err
67+
}
68+
defer src.readUnlock()
69+
var n int
70+
for {
71+
n, err = unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0)
72+
if err != syscall.EINTR {
73+
break
74+
}
75+
}
76+
return int64(n), err
77+
}

src/internal/syscall/unix/copy_file_range_linux.go renamed to src/internal/syscall/unix/copy_file_range_unix.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5+
//go:build freebsd || linux
6+
57
package unix
68

79
import (
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package unix
6+
7+
import (
8+
"sync"
9+
"syscall"
10+
)
11+
12+
// KernelVersion returns major and minor kernel version numbers
13+
// parsed from the syscall.Sysctl("kern.osrelease")'s value,
14+
// or (0, 0) if the version can't be obtained or parsed.
15+
func KernelVersion() (major, minor int) {
16+
release, err := syscall.Sysctl("kern.osrelease")
17+
if err != nil {
18+
return 0, 0
19+
}
20+
21+
parseNext := func() (n int) {
22+
for i, c := range release {
23+
if c == '.' {
24+
release = release[i+1:]
25+
return
26+
}
27+
if '0' <= c && c <= '9' {
28+
n = n*10 + int(c-'0')
29+
}
30+
}
31+
release = ""
32+
return
33+
}
34+
35+
major = parseNext()
36+
minor = parseNext()
37+
38+
return
39+
}
40+
41+
// SupportCopyFileRange reports whether the kernel supports the copy_file_range(2).
42+
// This function will examine both the kernel version and the availability of the system call.
43+
var SupportCopyFileRange = sync.OnceValue(func() bool {
44+
// The copy_file_range() function first appeared in FreeBSD 13.0.
45+
major, _ := KernelVersion()
46+
_, err := CopyFileRange(0, nil, 0, nil, 0, 0)
47+
return major >= 13 && err != syscall.ENOSYS
48+
})
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package unix_test
6+
7+
import (
8+
"internal/syscall/unix"
9+
"syscall"
10+
"testing"
11+
)
12+
13+
func TestSupportCopyFileRange(t *testing.T) {
14+
major, minor := unix.KernelVersion()
15+
t.Logf("Running on FreeBSD %d.%d\n", major, minor)
16+
17+
_, err := unix.CopyFileRange(0, nil, 0, nil, 0, 0)
18+
want := err != syscall.ENOSYS
19+
got := unix.SupportCopyFileRange()
20+
if want != got {
21+
t.Fatalf("SupportCopyFileRange, got %t; want %t", got, want)
22+
}
23+
}

src/internal/syscall/unix/kernel_version_linux.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@ import (
88
"syscall"
99
)
1010

11-
// KernelVersion returns major and minor kernel version numbers, parsed from
12-
// the syscall.Uname's Release field, or 0, 0 if the version can't be obtained
13-
// or parsed.
14-
//
15-
// Currently only implemented for Linux.
11+
// KernelVersion returns major and minor kernel version numbers
12+
// parsed from the syscall.Uname's Release field, or (0, 0) if
13+
// the version can't be obtained or parsed.
1614
func KernelVersion() (major, minor int) {
1715
var uname syscall.Utsname
1816
if err := syscall.Uname(&uname); err != nil {

src/internal/syscall/unix/kernel_version_other.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build !linux && !solaris
5+
//go:build !freebsd && !linux && !solaris
66

77
package unix
88

0 commit comments

Comments
 (0)