summaryrefslogtreecommitdiff
path: root/src/os
diff options
context:
space:
mode:
authorqmuntal <quimmuntal@gmail.com>2023-05-05 18:17:18 +0200
committerQuim Muntal <quimmuntal@gmail.com>2023-05-15 09:26:16 +0000
commit974236bda9b9aad87b4b10ec9af2cc01b14e382f (patch)
tree321cb16cc9fc2eec78fc81060ff64db06115a544 /src/os
parent91b8cc0dfaae12af1a89e2b7ad3da10728883ee1 (diff)
downloadgo-git-974236bda9b9aad87b4b10ec9af2cc01b14e382f.tar.gz
os, syscall: support ill-formed UTF-16 strings on Windows
Windows UTF-16 strings can contain unpaired surrogates, which can't be decoded into a valid UTF-8 string. This file defines a set of functions that can be used to encode and decode potentially ill-formed UTF-16 strings by using the [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/). WTF-8 is a strict superset of UTF-8, i.e. any string that is well-formed in UTF-8 is also well-formed in WTF-8 and the content is unchanged. Also, the conversion never fails and is lossless. The benefit of using WTF-8 instead of UTF-8 when decoding a UTF-16 string is that the conversion is lossless even for ill-formed UTF-16 strings. This property allows to read an ill-formed UTF-16 string, convert it to a Go string, and convert it back to the same original UTF-16 string. Fixes #59971 Change-Id: Id6007f6e537844913402b233e73d698688cd5ba6 Reviewed-on: https://go-review.googlesource.com/c/go/+/493036 TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Run-TryBot: Quim Muntal <quimmuntal@gmail.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Paul Hampson <Paul.Hampson@Pobox.com>
Diffstat (limited to 'src/os')
-rw-r--r--src/os/dir_windows.go3
-rw-r--r--src/os/exec/lp_windows_test.go3
-rw-r--r--src/os/file_windows.go3
-rw-r--r--src/os/os_windows_test.go69
4 files changed, 72 insertions, 6 deletions
diff --git a/src/os/dir_windows.go b/src/os/dir_windows.go
index cee05cc729..7792d03040 100644
--- a/src/os/dir_windows.go
+++ b/src/os/dir_windows.go
@@ -11,7 +11,6 @@ import (
"runtime"
"sync"
"syscall"
- "unicode/utf16"
"unsafe"
)
@@ -104,7 +103,7 @@ func (file *File) readdir(n int, mode readdirMode) (names []string, dirents []Di
d.bufp = 0
}
nameslice := unsafe.Slice(&info.FileName[0], info.FileNameLength/2)
- name := string(utf16.Decode(nameslice))
+ name := syscall.UTF16ToString(nameslice)
if name == "." || name == ".." { // Useless names
continue
}
diff --git a/src/os/exec/lp_windows_test.go b/src/os/exec/lp_windows_test.go
index 50d522948a..4d85a5f415 100644
--- a/src/os/exec/lp_windows_test.go
+++ b/src/os/exec/lp_windows_test.go
@@ -587,7 +587,6 @@ package main
import (
"os"
"syscall"
- "unicode/utf16"
"unsafe"
)
@@ -599,7 +598,7 @@ func getMyName() (string, error) {
if n == 0 {
return "", err
}
- return string(utf16.Decode(b[0:n])), nil
+ return syscall.UTF16ToString(b[0:n]), nil
}
func main() {
diff --git a/src/os/file_windows.go b/src/os/file_windows.go
index f5a436e235..37db3f931c 100644
--- a/src/os/file_windows.go
+++ b/src/os/file_windows.go
@@ -11,7 +11,6 @@ import (
"runtime"
"sync"
"syscall"
- "unicode/utf16"
"unsafe"
)
@@ -259,7 +258,7 @@ func tempDir() string {
// Otherwise remove terminating \.
n--
}
- return string(utf16.Decode(b[:n]))
+ return syscall.UTF16ToString(b[:n])
}
}
diff --git a/src/os/os_windows_test.go b/src/os/os_windows_test.go
index 21a8c21d1e..fbc8cc1b9f 100644
--- a/src/os/os_windows_test.go
+++ b/src/os/os_windows_test.go
@@ -18,6 +18,7 @@ import (
"path/filepath"
"reflect"
"runtime"
+ "slices"
"sort"
"strings"
"syscall"
@@ -1377,3 +1378,71 @@ func TestAppExecLinkStat(t *testing.T) {
t.Errorf("exec.LookPath(%q) = %q; want %q", pythonPath, p, pythonPath)
}
}
+
+func TestIllformedUTF16FileName(t *testing.T) {
+ dir := t.TempDir()
+ const sep = string(os.PathSeparator)
+ if !strings.HasSuffix(dir, sep) {
+ dir += sep
+ }
+
+ // This UTF-16 file name is ill-formed as it contains low surrogates that are not preceded by high surrogates ([1:5]).
+ namew := []uint16{0x2e, 0xdc6d, 0xdc73, 0xdc79, 0xdc73, 0x30, 0x30, 0x30, 0x31, 0}
+
+ // Create a file whose name contains unpaired surrogates.
+ // Use syscall.CreateFile instead of os.Create to simulate a file that is created by
+ // a non-Go program so the file name hasn't gone through syscall.UTF16FromString.
+ dirw := utf16.Encode([]rune(dir))
+ pathw := append(dirw, namew...)
+ fd, err := syscall.CreateFile(&pathw[0], syscall.GENERIC_ALL, 0, nil, syscall.CREATE_NEW, 0, 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ syscall.CloseHandle(fd)
+
+ name := syscall.UTF16ToString(namew)
+ path := filepath.Join(dir, name)
+ // Verify that os.Lstat can query the file.
+ fi, err := os.Lstat(path)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := fi.Name(); got != name {
+ t.Errorf("got %q, want %q", got, name)
+ }
+ // Verify that File.Readdirnames lists the file.
+ f, err := os.Open(dir)
+ if err != nil {
+ t.Fatal(err)
+ }
+ files, err := f.Readdirnames(0)
+ f.Close()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !slices.Contains(files, name) {
+ t.Error("file not listed")
+ }
+ // Verify that os.RemoveAll can remove the directory
+ // and that it doesn't hang.
+ err = os.RemoveAll(dir)
+ if err != nil {
+ t.Error(err)
+ }
+}
+
+func TestUTF16Alloc(t *testing.T) {
+ allowsPerRun := func(want int, f func()) {
+ t.Helper()
+ got := int(testing.AllocsPerRun(5, f))
+ if got != want {
+ t.Errorf("got %d allocs, want %d", got, want)
+ }
+ }
+ allowsPerRun(1, func() {
+ syscall.UTF16ToString([]uint16{'a', 'b', 'c'})
+ })
+ allowsPerRun(1, func() {
+ syscall.UTF16FromString("abc")
+ })
+}