path: root/utils
diff options
authorTamar Christina <>2018-03-29 14:22:09 +0100
committerTamar Christina <>2018-03-31 10:11:53 +0100
commit4de585a5c1ac3edc2914cebcac1753b514051a89 (patch)
tree09bfb4251808007bb4ad79c6f10f3e4fbe3e9312 /utils
parentafb686a88901d7d0c93627806d7e4d0444aa17e8 (diff)
Remove MAX_PATH restrictions from RTS, I/O manager and various utilities
Summary: This shims out fopen and sopen so that they use modern APIs under the hood along with namespaced paths. This lifts the MAX_PATH restrictions from Haskell programs and makes the new limit ~32k. There are only some slight caveats that have been documented. Some utilities have not been upgraded such as lndir, since all these things are different cabal packages I have been forced to copy the source in different places which is less than ideal. But it's the only way to keep sdist working. Test Plan: ./validate Reviewers: hvr, bgamari, erikd, simonmar Reviewed By: bgamari Subscribers: rwbarton, thomie, carter GHC Trac Issues: #10822 Differential Revision:
Diffstat (limited to 'utils')
7 files changed, 355 insertions, 19 deletions
diff --git a/utils/fs/README b/utils/fs/README
new file mode 100644
index 0000000000..5011939a38
--- /dev/null
+++ b/utils/fs/README
@@ -0,0 +1,4 @@
+This "fs" library, used by various ghc utilities is used to share some common
+I/O filesystem functions with different packages.
+This file is copied across the build-system by configure.
diff --git a/utils/fs/fs.c b/utils/fs/fs.c
new file mode 100644
index 0000000000..24bf3a3632
--- /dev/null
+++ b/utils/fs/fs.c
@@ -0,0 +1,293 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) Tamar Christina 2018
+ *
+ * Windows I/O routines for file opening.
+ *
+ * NOTE: Only modify this file in utils/fs/ and rerun configure. Do not edit
+ * this file in any other directory as it will be overwritten.
+ *
+ * ---------------------------------------------------------------------------*/
+#include "fs.h"
+#include <stdio.h>
+#if defined(_WIN32)
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <windows.h>
+#include <io.h>
+#include <fcntl.h>
+#include <wchar.h>
+#include <sys\stat.h>
+#include <sys\types.h>
+#include <share.h>
+/* This function converts Windows paths between namespaces. More specifically
+ It converts an explorer style path into a NT or Win32 namespace.
+ This has several caveats but they are caviats that are native to Windows and
+ not POSIX. See
+ Anything else such as raw device paths we leave untouched. The main benefit
+ of doing any of this is that we can break the MAX_PATH restriction and also
+ access raw handles that we couldn't before. */
+static wchar_t* __hs_create_device_name (const wchar_t* filename) {
+ const wchar_t* win32_dev_namespace = L"\\\\.\\";
+ const wchar_t* win32_file_namespace = L"\\\\?\\";
+ const wchar_t* nt_device_namespace = L"\\Device\\";
+ const wchar_t* unc_prefix = L"UNC\\";
+ const wchar_t* network_share = L"\\\\";
+ wchar_t* result = _wcsdup (filename);
+ wchar_t ns[10] = {0};
+ /* If the file is already in a native namespace don't change it. */
+ if ( wcsncmp (win32_dev_namespace , filename, 4) == 0
+ || wcsncmp (win32_file_namespace, filename, 4) == 0
+ || wcsncmp (nt_device_namespace , filename, 8) == 0)
+ return result;
+ /* Since we're using the lower level APIs we must normalize slashes now. The
+ Win32 API layer will no longer convert '/' into '\\' for us. */
+ for (size_t i = 0; i < wcslen (result); i++)
+ {
+ if (result[i] == L'/')
+ result[i] = L'\\';
+ }
+ /* Now resolve any . and .. in the path or subsequent API calls may fail since
+ Win32 will no longer resolve them. */
+ DWORD nResult = GetFullPathNameW (result, 0, NULL, NULL) + 1;
+ wchar_t *temp = _wcsdup (result);
+ result = malloc (nResult * sizeof (wchar_t));
+ if (GetFullPathNameW (temp, nResult, result, NULL) == 0)
+ {
+ goto cleanup;
+ }
+ free (temp);
+ if (wcsncmp (network_share, result, 2) == 0)
+ {
+ if (swprintf (ns, 10, L"%ls%ls", win32_file_namespace, unc_prefix) <= 0)
+ {
+ goto cleanup;
+ }
+ }
+ else if (swprintf (ns, 10, L"%ls", win32_file_namespace) <= 0)
+ {
+ goto cleanup;
+ }
+ /* Create new string. */
+ int bLen = wcslen (result) + wcslen (ns) + 1;
+ temp = _wcsdup (result);
+ result = malloc (bLen * sizeof (wchar_t));
+ if (swprintf (result, bLen, L"%ls%ls", ns, temp) <= 0)
+ {
+ goto cleanup;
+ }
+ free (temp);
+ return result;
+ free (temp);
+ free (result);
+ return NULL;
+#define HAS_FLAG(a,b) ((a & b) == b)
+int FS(swopen) (const wchar_t* filename, int oflag, int shflag, int pmode)
+ /* Construct access mode. */
+ DWORD dwDesiredAccess = 0;
+ if (HAS_FLAG (oflag, _O_RDONLY))
+ if (HAS_FLAG (oflag, _O_RDWR))
+ if (HAS_FLAG (oflag, _O_WRONLY))
+ /* Construct shared mode. */
+ if (HAS_FLAG (shflag, _SH_DENYRW))
+ if (HAS_FLAG (shflag, _SH_DENYWR))
+ dwShareMode &= ~FILE_SHARE_WRITE;
+ if (HAS_FLAG (shflag, _SH_DENYRD))
+ dwShareMode &= ~FILE_SHARE_READ;
+ if (HAS_FLAG (pmode, _S_IWRITE))
+ if (HAS_FLAG (pmode, _S_IREAD))
+ dwShareMode |= FILE_SHARE_READ;
+ /* Override access mode with pmode if creating file. */
+ if (HAS_FLAG (oflag, _O_CREAT))
+ {
+ if (HAS_FLAG (pmode, _S_IWRITE))
+ dwDesiredAccess |= FILE_GENERIC_WRITE;
+ if (HAS_FLAG (pmode, _S_IREAD))
+ dwDesiredAccess |= FILE_GENERIC_READ;
+ }
+ /* Create file disposition. */
+ DWORD dwCreationDisposition = OPEN_EXISTING;
+ if (HAS_FLAG (oflag, _O_CREAT))
+ dwCreationDisposition = OPEN_ALWAYS;
+ if (HAS_FLAG (oflag, (_O_CREAT | _O_EXCL)))
+ dwCreationDisposition = CREATE_NEW;
+ if (HAS_FLAG (oflag, _O_TRUNC) && !HAS_FLAG (oflag, _O_CREAT))
+ dwCreationDisposition = TRUNCATE_EXISTING;
+ /* Set file access attributes. */
+ if (HAS_FLAG (oflag, _O_RDONLY))
+ dwFlagsAndAttributes |= 0; /* No special attribute. */
+ if (HAS_FLAG (oflag, (_O_CREAT | _O_TEMPORARY)))
+ dwFlagsAndAttributes |= FILE_FLAG_DELETE_ON_CLOSE;
+ if (HAS_FLAG (oflag, (_O_CREAT | _O_SHORT_LIVED)))
+ dwFlagsAndAttributes |= FILE_ATTRIBUTE_TEMPORARY;
+ if (HAS_FLAG (oflag, _O_RANDOM))
+ dwFlagsAndAttributes |= FILE_FLAG_RANDOM_ACCESS;
+ if (HAS_FLAG (oflag, _O_SEQUENTIAL))
+ dwFlagsAndAttributes |= FILE_FLAG_SEQUENTIAL_SCAN;
+ /* Flag is only valid on it's own. */
+ if (dwFlagsAndAttributes != FILE_ATTRIBUTE_NORMAL)
+ dwFlagsAndAttributes &= ~FILE_ATTRIBUTE_NORMAL;
+ /* Set security attributes. */
+ SECURITY_ATTRIBUTES securityAttributes;
+ ZeroMemory (&securityAttributes, sizeof(SECURITY_ATTRIBUTES));
+ securityAttributes.bInheritHandle = !(oflag & _O_NOINHERIT);
+ securityAttributes.lpSecurityDescriptor = NULL;
+ securityAttributes.nLength = sizeof(SECURITY_ATTRIBUTES);
+ wchar_t* _filename = __hs_create_device_name (filename);
+ if (!_filename)
+ return -1;
+ HANDLE hResult
+ = CreateFileW (_filename, dwDesiredAccess, dwShareMode, &securityAttributes,
+ dwCreationDisposition, dwFlagsAndAttributes, NULL);
+ free (_filename);
+ if (INVALID_HANDLE_VALUE == hResult)
+ return -1;
+ /* Now we have a Windows handle, we have to convert it to an FD and apply
+ the remaining flags. */
+ const int flag_mask = _O_APPEND | _O_RDONLY | _O_TEXT | _O_WTEXT;
+ int fd = _open_osfhandle ((intptr_t)hResult, oflag & flag_mask);
+ if (-1 == fd)
+ return -1;
+ /* Finally we can change the mode to the requested one. */
+ const int mode_mask = _O_TEXT | _O_BINARY | _O_U16TEXT | _O_U8TEXT | _O_WTEXT;
+ if ((oflag & mode_mask) && (-1 == _setmode (fd, oflag & mode_mask)))
+ return -1;
+ return fd;
+FILE *FS(fwopen) (const wchar_t* filename, const wchar_t* mode)
+ int shflag = 0;
+ int pmode = 0;
+ int oflag = 0;
+ int len = wcslen (mode);
+ int i;
+ #define IS_EXT(X) ((i < (len - 1)) && mode[i] == X)
+ for (i = 0; i < len; i++)
+ {
+ switch (mode[i])
+ {
+ case L'a':
+ if (IS_EXT (L'+'))
+ oflag |= _O_RDWR | _O_CREAT | _O_APPEND;
+ else
+ oflag |= _O_WRONLY | _O_CREAT | _O_APPEND;
+ break;
+ case L'r':
+ if (IS_EXT (L'+'))
+ oflag |= _O_RDWR;
+ else
+ oflag |= _O_RDONLY;
+ break;
+ case L'w':
+ if (IS_EXT (L'+'))
+ oflag |= _O_RDWR | _O_CREAT | _O_TRUNC;
+ else
+ oflag |= _O_WRONLY | _O_CREAT | _O_TRUNC;
+ break;
+ case L'b':
+ oflag |= _O_BINARY;
+ break;
+ case L't':
+ oflag |= _O_TEXT;
+ break;
+ case L'c':
+ case L'n':
+ oflag |= 0;
+ break;
+ case L'S':
+ oflag |= _O_SEQUENTIAL;
+ break;
+ case L'R':
+ oflag |= _O_RANDOM;
+ break;
+ case L'T':
+ oflag |= _O_SHORT_LIVED;
+ break;
+ case L'D':
+ oflag |= _O_TEMPORARY;
+ break;
+ default:
+ if (wcsncmp (mode, L"ccs=UNICODE", 11) == 0)
+ oflag |= _O_WTEXT;
+ else if (wcsncmp (mode, L"ccs=UTF-8", 9) == 0)
+ oflag |= _O_U8TEXT;
+ else if (wcsncmp (mode, L"ccs=UTF-16LE", 12) == 0)
+ oflag |= _O_U16TEXT;
+ else continue;
+ }
+ }
+ #undef IS_EXT
+ int fd = FS(swopen) (filename, oflag, shflag, pmode);
+ FILE* file = _wfdopen (fd, mode);
+ return file;
+FILE *FS(fopen) (const char* filename, const char* mode)
+ size_t len = mbstowcs (NULL, filename, 0);
+ wchar_t *w_filename = malloc (sizeof (wchar_t) * (len + 1));
+ mbstowcs (w_filename, filename, len);
+ w_filename[len] = L'\0';
+ len = mbstowcs (NULL, mode, 0);
+ wchar_t *w_mode = malloc (sizeof (wchar_t) * (len + 1));
+ mbstowcs (w_mode, mode, len);
+ w_mode[len] = L'\0';
+ FILE *result = FS(fwopen) (w_filename, w_mode);
+ free (w_filename);
+ free (w_mode);
+ return result;
+FILE *FS(fopen) (const char* filename, const char* mode)
+ return fopen (filename, mode);
diff --git a/utils/fs/fs.h b/utils/fs/fs.h
new file mode 100644
index 0000000000..ab2eded2a1
--- /dev/null
+++ b/utils/fs/fs.h
@@ -0,0 +1,36 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) Tamar Christina 2018
+ *
+ * Windows I/O routines for file opening.
+ *
+ * NOTE: Only modify this file in utils/fs/ and rerun configure. Do not edit
+ * this file in any other directory as it will be overwritten.
+ *
+ * ---------------------------------------------------------------------------*/
+#pragma once
+#include <stdio.h>
+#if !defined(FS_NAMESPACE)
+#define FS_NAMESPACE hs
+/* Play some dirty tricks to get CPP to expand correctly. */
+#define FS_FULL(ns, name) __##ns##_##name
+#define prefix FS_NAMESPACE
+#define FS_L(p, n) FS_FULL(p, n)
+#define FS(name) FS_L(prefix, name)
+#if defined(_WIN32)
+#include <wchar.h>
+int FS(swopen) (const wchar_t* filename, int oflag,
+ int shflag, int pmode);
+FILE *FS(fwopen) (const wchar_t* filename, const wchar_t* mode);
+FILE *FS(fopen) (const char* filename, const char* mode);
+FILE *FS(fopen) (const char* filename, const char* mode);
diff --git a/utils/lndir/lndir.c b/utils/lndir/lndir.c
index 87f2824166..8ea5ab2ab4 100644
--- a/utils/lndir/lndir.c
+++ b/utils/lndir/lndir.c
@@ -2,7 +2,7 @@
/* Create shadow link tree (after X11R4 script of the same name)
Mark Reinhold ( January 1990 */
Copyright (c) 1990, X Consortium
Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -46,6 +46,7 @@ in this Software without prior written authorization from the X Consortium.
#define NeedVarargsPrototypes 1
#include "lndir-Xos.h"
+#include "fs.h"
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
@@ -182,11 +183,11 @@ int copyfile(const char *oldpath, const char *newpath) {
return symlink(oldpath, newpath);
} else {
- f_old = fopen(oldpath, "rb");
+ f_old = __hs_fopen(oldpath, "rb");
if (f_old == NULL) {
return -1;
- f_new = fopen(newpath, "wbx");
+ f_new = __hs_fopen(newpath, "wbx");
if (f_new == NULL) {
e = errno;
@@ -272,7 +273,7 @@ int rel; /* if true, prepend "../" to fn before using */
buf[0] = '\0';
strcat (buf, fn);
if (!(df = opendir (buf))) {
msg ("%s: Cannot opendir", buf);
return 1;
@@ -305,7 +306,7 @@ int rel; /* if true, prepend "../" to fn before using */
#if defined(S_ISDIR)
- if (sb.st_mode & S_IFDIR)
+ if (sb.st_mode & S_IFDIR)
/* directory */
@@ -397,7 +398,7 @@ int rel; /* if true, prepend "../" to fn before using */
mperror (dp->d_name);
closedir (df);
return 0;
@@ -410,7 +411,7 @@ char **av;
char* tn;
struct stat fs, ts;
#if defined(__CYGWIN32__)
- /*
+ /*
The lndir code assumes unix-style paths to work. cygwin
lets you get away with using dos'ish paths (e.g., "f:/oo")
in most contexts. Using them with 'lndir' will seriously
@@ -457,7 +458,7 @@ char **av;
if (stat (tn, &ts) < 0) {
if (force && (tn[0] != '.' || tn[1] != '\0') ) {
mymkdir(tn, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
- }
+ }
else {
quiterr (1, tn);
#if defined(S_ISDIR)
diff --git a/utils/unlit/ b/utils/unlit/
index 8911f4e856..0560aa57b8 100644
--- a/utils/unlit/
+++ b/utils/unlit/
@@ -11,7 +11,7 @@
# -----------------------------------------------------------------------------
# built by ghc-stage0
-utils/unlit_dist_C_SRCS = unlit.c
+utils/unlit_dist_C_SRCS = unlit.c fs.c
utils/unlit_dist_PROGNAME = unlit
utils/unlit_dist_TOPDIR = YES
utils/unlit_dist_INSTALL_INPLACE = YES
diff --git a/utils/unlit/unlit.c b/utils/unlit/unlit.c
index 4eb91d71be..97f853b268 100644
--- a/utils/unlit/unlit.c
+++ b/utils/unlit/unlit.c
@@ -7,7 +7,7 @@
* column on each line. It is hoped that this style of programming will
* encourage the writing of accurate and clearly documented programs
* in which the writer may include motivating arguments, examples
- * and explanations.
+ * and explanations.
* Unlit is a filter that can be used to strip all of the comment lines
* out of a literate script file. The command format for unlit is:
@@ -40,6 +40,7 @@
* And \begin{pseudocode} ... \end{pseudocode}. -- LA
+#include "fs.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
@@ -115,7 +116,7 @@ static void myputc(char c, FILE *ostream)
if (putc(c,ostream) == EOF) {
- }
+ }
#define TABPOS 8
@@ -179,7 +180,7 @@ static line readline(FILE *istream, FILE *ostream) {
if (c==EOF)
return ENDFILE;
if ( c == '#' ) {
if ( ignore_shebang ) {
c1 = egetc(istream);
@@ -335,10 +336,10 @@ int main(int argc,char **argv)
else if (strcmp(*argv,"-h")==0) {
if (argc > 1) {
argc--; argv++;
- if (prefix_str)
+ if (prefix_str)
prefix_str = (char*)malloc(sizeof(char)*(1+strlen(*argv)));
- if (prefix_str)
+ if (prefix_str)
strcpy(prefix_str, *argv);
} else if (strcmp(*argv,"-#")==0)
@@ -362,16 +363,16 @@ int main(int argc,char **argv)
file = "stdin";
- if ((istream=fopen(argv[0], "r")) == NULL) {
+ if ((istream=__hs_fopen(argv[0], "r")) == NULL) {
fprintf(stderr, CANNOTOPEN, argv[0]);
- if (strcmp(argv[1], "-")==0)
- ostream = stdout;
+ if (strcmp(argv[1], "-")==0)
+ ostream = stdout;
- if ((ostream=fopen(argv[1], "w")) == NULL) {
+ if ((ostream=__hs_fopen(argv[1], "w")) == NULL) {
fprintf(stderr, CANNOTOPEN, argv[1]);
diff --git a/utils/unlit/unlit.cabal b/utils/unlit/unlit.cabal
index a621f04bc7..622a55934d 100644
--- a/utils/unlit/unlit.cabal
+++ b/utils/unlit/unlit.cabal
@@ -13,4 +13,5 @@ build-type: Simple
Executable unlit
Default-Language: Haskell2010
Main-Is: unlit.c
- C-Sources: unlit.c
+ C-Sources: unlit.c, fs.c
+ Includes: fs.h