diff options
author | Tamar Christina <tamar@zhox.com> | 2018-03-29 14:22:09 +0100 |
---|---|---|
committer | Tamar Christina <tamar@zhox.com> | 2018-03-31 10:11:53 +0100 |
commit | 4de585a5c1ac3edc2914cebcac1753b514051a89 (patch) | |
tree | 09bfb4251808007bb4ad79c6f10f3e4fbe3e9312 /utils | |
parent | afb686a88901d7d0c93627806d7e4d0444aa17e8 (diff) | |
download | haskell-4de585a5c1ac3edc2914cebcac1753b514051a89.tar.gz |
Remove MAX_PATH restrictions from RTS, I/O manager and various utilities
Summary:
This shims out fopen and sopen so that they use modern APIs under the hood
along with namespaced paths.
This lifts the MAX_PATH restrictions from Haskell programs and makes the new
limit ~32k.
There are only some slight caveats that have been documented.
Some utilities have not been upgraded such as lndir, since all these things are
different cabal packages I have been forced to copy the source in different places
which is less than ideal. But it's the only way to keep sdist working.
Test Plan: ./validate
Reviewers: hvr, bgamari, erikd, simonmar
Reviewed By: bgamari
Subscribers: rwbarton, thomie, carter
GHC Trac Issues: #10822
Differential Revision: https://phabricator.haskell.org/D4416
Diffstat (limited to 'utils')
-rw-r--r-- | utils/fs/README | 4 | ||||
-rw-r--r-- | utils/fs/fs.c | 293 | ||||
-rw-r--r-- | utils/fs/fs.h | 36 | ||||
-rw-r--r-- | utils/lndir/lndir.c | 17 | ||||
-rw-r--r-- | utils/unlit/ghc.mk | 2 | ||||
-rw-r--r-- | utils/unlit/unlit.c | 19 | ||||
-rw-r--r-- | utils/unlit/unlit.cabal | 3 |
7 files changed, 355 insertions, 19 deletions
diff --git a/utils/fs/README b/utils/fs/README new file mode 100644 index 0000000000..5011939a38 --- /dev/null +++ b/utils/fs/README @@ -0,0 +1,4 @@ +This "fs" library, used by various ghc utilities is used to share some common +I/O filesystem functions with different packages. + +This file is copied across the build-system by configure. diff --git a/utils/fs/fs.c b/utils/fs/fs.c new file mode 100644 index 0000000000..24bf3a3632 --- /dev/null +++ b/utils/fs/fs.c @@ -0,0 +1,293 @@ +/* ----------------------------------------------------------------------------- + * + * (c) Tamar Christina 2018 + * + * Windows I/O routines for file opening. + * + * NOTE: Only modify this file in utils/fs/ and rerun configure. Do not edit + * this file in any other directory as it will be overwritten. + * + * ---------------------------------------------------------------------------*/ +#include "fs.h" +#include <stdio.h> + +#if defined(_WIN32) + +#include <stdbool.h> +#include <stdlib.h> +#include <stdint.h> + +#include <windows.h> +#include <io.h> +#include <fcntl.h> +#include <wchar.h> +#include <sys\stat.h> +#include <sys\types.h> +#include <share.h> + +/* This function converts Windows paths between namespaces. More specifically + It converts an explorer style path into a NT or Win32 namespace. + This has several caveats but they are caviats that are native to Windows and + not POSIX. See + https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx. + Anything else such as raw device paths we leave untouched. The main benefit + of doing any of this is that we can break the MAX_PATH restriction and also + access raw handles that we couldn't before. */ +static wchar_t* __hs_create_device_name (const wchar_t* filename) { + const wchar_t* win32_dev_namespace = L"\\\\.\\"; + const wchar_t* win32_file_namespace = L"\\\\?\\"; + const wchar_t* nt_device_namespace = L"\\Device\\"; + const wchar_t* unc_prefix = L"UNC\\"; + const wchar_t* network_share = L"\\\\"; + + wchar_t* result = _wcsdup (filename); + wchar_t ns[10] = {0}; + + /* If the file is already in a native namespace don't change it. */ + if ( wcsncmp (win32_dev_namespace , filename, 4) == 0 + || wcsncmp (win32_file_namespace, filename, 4) == 0 + || wcsncmp (nt_device_namespace , filename, 8) == 0) + return result; + + /* Since we're using the lower level APIs we must normalize slashes now. The + Win32 API layer will no longer convert '/' into '\\' for us. */ + for (size_t i = 0; i < wcslen (result); i++) + { + if (result[i] == L'/') + result[i] = L'\\'; + } + + /* Now resolve any . and .. in the path or subsequent API calls may fail since + Win32 will no longer resolve them. */ + DWORD nResult = GetFullPathNameW (result, 0, NULL, NULL) + 1; + wchar_t *temp = _wcsdup (result); + result = malloc (nResult * sizeof (wchar_t)); + if (GetFullPathNameW (temp, nResult, result, NULL) == 0) + { + goto cleanup; + } + + free (temp); + + if (wcsncmp (network_share, result, 2) == 0) + { + if (swprintf (ns, 10, L"%ls%ls", win32_file_namespace, unc_prefix) <= 0) + { + goto cleanup; + } + } + else if (swprintf (ns, 10, L"%ls", win32_file_namespace) <= 0) + { + goto cleanup; + } + + /* Create new string. */ + int bLen = wcslen (result) + wcslen (ns) + 1; + temp = _wcsdup (result); + result = malloc (bLen * sizeof (wchar_t)); + if (swprintf (result, bLen, L"%ls%ls", ns, temp) <= 0) + { + goto cleanup; + } + + free (temp); + + return result; + +cleanup: + free (temp); + free (result); + return NULL; +} + +#define HAS_FLAG(a,b) ((a & b) == b) + +int FS(swopen) (const wchar_t* filename, int oflag, int shflag, int pmode) +{ + /* Construct access mode. */ + DWORD dwDesiredAccess = 0; + if (HAS_FLAG (oflag, _O_RDONLY)) + dwDesiredAccess |= GENERIC_READ | FILE_READ_DATA | FILE_READ_ATTRIBUTES | + FILE_WRITE_ATTRIBUTES;; + if (HAS_FLAG (oflag, _O_RDWR)) + dwDesiredAccess |= GENERIC_WRITE | GENERIC_READ | FILE_READ_DATA | + FILE_WRITE_DATA | FILE_READ_ATTRIBUTES | + FILE_WRITE_ATTRIBUTES; + if (HAS_FLAG (oflag, _O_WRONLY)) + dwDesiredAccess|= GENERIC_WRITE | FILE_WRITE_DATA | + FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES; + + /* Construct shared mode. */ + DWORD dwShareMode = FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE; + if (HAS_FLAG (shflag, _SH_DENYRW)) + dwShareMode &= ~(FILE_SHARE_READ | FILE_SHARE_WRITE); + if (HAS_FLAG (shflag, _SH_DENYWR)) + dwShareMode &= ~FILE_SHARE_WRITE; + if (HAS_FLAG (shflag, _SH_DENYRD)) + dwShareMode &= ~FILE_SHARE_READ; + if (HAS_FLAG (pmode, _S_IWRITE)) + dwShareMode |= FILE_SHARE_READ | FILE_SHARE_WRITE; + if (HAS_FLAG (pmode, _S_IREAD)) + dwShareMode |= FILE_SHARE_READ; + + /* Override access mode with pmode if creating file. */ + if (HAS_FLAG (oflag, _O_CREAT)) + { + if (HAS_FLAG (pmode, _S_IWRITE)) + dwDesiredAccess |= FILE_GENERIC_WRITE; + if (HAS_FLAG (pmode, _S_IREAD)) + dwDesiredAccess |= FILE_GENERIC_READ; + } + + /* Create file disposition. */ + DWORD dwCreationDisposition = OPEN_EXISTING; + if (HAS_FLAG (oflag, _O_CREAT)) + dwCreationDisposition = OPEN_ALWAYS; + if (HAS_FLAG (oflag, (_O_CREAT | _O_EXCL))) + dwCreationDisposition = CREATE_NEW; + if (HAS_FLAG (oflag, _O_TRUNC) && !HAS_FLAG (oflag, _O_CREAT)) + dwCreationDisposition = TRUNCATE_EXISTING; + + /* Set file access attributes. */ + DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; + if (HAS_FLAG (oflag, _O_RDONLY)) + dwFlagsAndAttributes |= 0; /* No special attribute. */ + if (HAS_FLAG (oflag, (_O_CREAT | _O_TEMPORARY))) + dwFlagsAndAttributes |= FILE_FLAG_DELETE_ON_CLOSE; + if (HAS_FLAG (oflag, (_O_CREAT | _O_SHORT_LIVED))) + dwFlagsAndAttributes |= FILE_ATTRIBUTE_TEMPORARY; + if (HAS_FLAG (oflag, _O_RANDOM)) + dwFlagsAndAttributes |= FILE_FLAG_RANDOM_ACCESS; + if (HAS_FLAG (oflag, _O_SEQUENTIAL)) + dwFlagsAndAttributes |= FILE_FLAG_SEQUENTIAL_SCAN; + /* Flag is only valid on it's own. */ + if (dwFlagsAndAttributes != FILE_ATTRIBUTE_NORMAL) + dwFlagsAndAttributes &= ~FILE_ATTRIBUTE_NORMAL; + + /* Set security attributes. */ + SECURITY_ATTRIBUTES securityAttributes; + ZeroMemory (&securityAttributes, sizeof(SECURITY_ATTRIBUTES)); + securityAttributes.bInheritHandle = !(oflag & _O_NOINHERIT); + securityAttributes.lpSecurityDescriptor = NULL; + securityAttributes.nLength = sizeof(SECURITY_ATTRIBUTES); + + wchar_t* _filename = __hs_create_device_name (filename); + if (!_filename) + return -1; + + HANDLE hResult + = CreateFileW (_filename, dwDesiredAccess, dwShareMode, &securityAttributes, + dwCreationDisposition, dwFlagsAndAttributes, NULL); + free (_filename); + if (INVALID_HANDLE_VALUE == hResult) + return -1; + + /* Now we have a Windows handle, we have to convert it to an FD and apply + the remaining flags. */ + const int flag_mask = _O_APPEND | _O_RDONLY | _O_TEXT | _O_WTEXT; + int fd = _open_osfhandle ((intptr_t)hResult, oflag & flag_mask); + if (-1 == fd) + return -1; + + /* Finally we can change the mode to the requested one. */ + const int mode_mask = _O_TEXT | _O_BINARY | _O_U16TEXT | _O_U8TEXT | _O_WTEXT; + if ((oflag & mode_mask) && (-1 == _setmode (fd, oflag & mode_mask))) + return -1; + + return fd; +} + +FILE *FS(fwopen) (const wchar_t* filename, const wchar_t* mode) +{ + int shflag = 0; + int pmode = 0; + int oflag = 0; + + int len = wcslen (mode); + int i; + #define IS_EXT(X) ((i < (len - 1)) && mode[i] == X) + + for (i = 0; i < len; i++) + { + switch (mode[i]) + { + case L'a': + if (IS_EXT (L'+')) + oflag |= _O_RDWR | _O_CREAT | _O_APPEND; + else + oflag |= _O_WRONLY | _O_CREAT | _O_APPEND; + break; + case L'r': + if (IS_EXT (L'+')) + oflag |= _O_RDWR; + else + oflag |= _O_RDONLY; + break; + case L'w': + if (IS_EXT (L'+')) + oflag |= _O_RDWR | _O_CREAT | _O_TRUNC; + else + oflag |= _O_WRONLY | _O_CREAT | _O_TRUNC; + break; + case L'b': + oflag |= _O_BINARY; + break; + case L't': + oflag |= _O_TEXT; + break; + case L'c': + case L'n': + oflag |= 0; + break; + case L'S': + oflag |= _O_SEQUENTIAL; + break; + case L'R': + oflag |= _O_RANDOM; + break; + case L'T': + oflag |= _O_SHORT_LIVED; + break; + case L'D': + oflag |= _O_TEMPORARY; + break; + default: + if (wcsncmp (mode, L"ccs=UNICODE", 11) == 0) + oflag |= _O_WTEXT; + else if (wcsncmp (mode, L"ccs=UTF-8", 9) == 0) + oflag |= _O_U8TEXT; + else if (wcsncmp (mode, L"ccs=UTF-16LE", 12) == 0) + oflag |= _O_U16TEXT; + else continue; + } + } + #undef IS_EXT + + int fd = FS(swopen) (filename, oflag, shflag, pmode); + FILE* file = _wfdopen (fd, mode); + return file; +} + +FILE *FS(fopen) (const char* filename, const char* mode) +{ + size_t len = mbstowcs (NULL, filename, 0); + wchar_t *w_filename = malloc (sizeof (wchar_t) * (len + 1)); + mbstowcs (w_filename, filename, len); + w_filename[len] = L'\0'; + + len = mbstowcs (NULL, mode, 0); + wchar_t *w_mode = malloc (sizeof (wchar_t) * (len + 1)); + mbstowcs (w_mode, mode, len); + w_mode[len] = L'\0'; + + FILE *result = FS(fwopen) (w_filename, w_mode); + free (w_filename); + free (w_mode); + return result; +} +#else +FILE *FS(fopen) (const char* filename, const char* mode) +{ + return fopen (filename, mode); +} +#endif diff --git a/utils/fs/fs.h b/utils/fs/fs.h new file mode 100644 index 0000000000..ab2eded2a1 --- /dev/null +++ b/utils/fs/fs.h @@ -0,0 +1,36 @@ +/* ----------------------------------------------------------------------------- + * + * (c) Tamar Christina 2018 + * + * Windows I/O routines for file opening. + * + * NOTE: Only modify this file in utils/fs/ and rerun configure. Do not edit + * this file in any other directory as it will be overwritten. + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include <stdio.h> + +#if !defined(FS_NAMESPACE) +#define FS_NAMESPACE hs +#endif + +/* Play some dirty tricks to get CPP to expand correctly. */ +#define FS_FULL(ns, name) __##ns##_##name +#define prefix FS_NAMESPACE +#define FS_L(p, n) FS_FULL(p, n) +#define FS(name) FS_L(prefix, name) + +#if defined(_WIN32) +#include <wchar.h> + +int FS(swopen) (const wchar_t* filename, int oflag, + int shflag, int pmode); +FILE *FS(fwopen) (const wchar_t* filename, const wchar_t* mode); +FILE *FS(fopen) (const char* filename, const char* mode); +#else + +FILE *FS(fopen) (const char* filename, const char* mode); +#endif diff --git a/utils/lndir/lndir.c b/utils/lndir/lndir.c index 87f2824166..8ea5ab2ab4 100644 --- a/utils/lndir/lndir.c +++ b/utils/lndir/lndir.c @@ -2,7 +2,7 @@ /* Create shadow link tree (after X11R4 script of the same name) Mark Reinhold (mbr@lcs.mit.edu)/3 January 1990 */ -/* +/* Copyright (c) 1990, X Consortium Permission is hereby granted, free of charge, to any person obtaining a copy @@ -46,6 +46,7 @@ in this Software without prior written authorization from the X Consortium. #define NeedVarargsPrototypes 1 #include "lndir-Xos.h" +#include "fs.h" #include <stdlib.h> #include <stdio.h> #include <sys/stat.h> @@ -182,11 +183,11 @@ int copyfile(const char *oldpath, const char *newpath) { return symlink(oldpath, newpath); } else { #endif - f_old = fopen(oldpath, "rb"); + f_old = __hs_fopen(oldpath, "rb"); if (f_old == NULL) { return -1; } - f_new = fopen(newpath, "wbx"); + f_new = __hs_fopen(newpath, "wbx"); if (f_new == NULL) { e = errno; fclose(f_old); @@ -272,7 +273,7 @@ int rel; /* if true, prepend "../" to fn before using */ else buf[0] = '\0'; strcat (buf, fn); - + if (!(df = opendir (buf))) { msg ("%s: Cannot opendir", buf); return 1; @@ -305,7 +306,7 @@ int rel; /* if true, prepend "../" to fn before using */ #if defined(S_ISDIR) if(S_ISDIR(sb.st_mode)) #else - if (sb.st_mode & S_IFDIR) + if (sb.st_mode & S_IFDIR) #endif { /* directory */ @@ -397,7 +398,7 @@ int rel; /* if true, prepend "../" to fn before using */ mperror (dp->d_name); } } - + closedir (df); return 0; } @@ -410,7 +411,7 @@ char **av; char* tn; struct stat fs, ts; #if defined(__CYGWIN32__) - /* + /* The lndir code assumes unix-style paths to work. cygwin lets you get away with using dos'ish paths (e.g., "f:/oo") in most contexts. Using them with 'lndir' will seriously @@ -457,7 +458,7 @@ char **av; if (stat (tn, &ts) < 0) { if (force && (tn[0] != '.' || tn[1] != '\0') ) { mymkdir(tn, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH ); - } + } else { quiterr (1, tn); #if defined(S_ISDIR) diff --git a/utils/unlit/ghc.mk b/utils/unlit/ghc.mk index 8911f4e856..0560aa57b8 100644 --- a/utils/unlit/ghc.mk +++ b/utils/unlit/ghc.mk @@ -11,7 +11,7 @@ # ----------------------------------------------------------------------------- # built by ghc-stage0 -utils/unlit_dist_C_SRCS = unlit.c +utils/unlit_dist_C_SRCS = unlit.c fs.c utils/unlit_dist_PROGNAME = unlit utils/unlit_dist_TOPDIR = YES utils/unlit_dist_INSTALL_INPLACE = YES diff --git a/utils/unlit/unlit.c b/utils/unlit/unlit.c index 4eb91d71be..97f853b268 100644 --- a/utils/unlit/unlit.c +++ b/utils/unlit/unlit.c @@ -7,7 +7,7 @@ * column on each line. It is hoped that this style of programming will * encourage the writing of accurate and clearly documented programs * in which the writer may include motivating arguments, examples - * and explanations. + * and explanations. * * Unlit is a filter that can be used to strip all of the comment lines * out of a literate script file. The command format for unlit is: @@ -40,6 +40,7 @@ * And \begin{pseudocode} ... \end{pseudocode}. -- LA */ +#include "fs.h" #include <string.h> #include <stdio.h> #include <stdlib.h> @@ -115,7 +116,7 @@ static void myputc(char c, FILE *ostream) { if (putc(c,ostream) == EOF) { writeerror(); - } + } } #define TABPOS 8 @@ -179,7 +180,7 @@ static line readline(FILE *istream, FILE *ostream) { if (c==EOF) return ENDFILE; - + if ( c == '#' ) { if ( ignore_shebang ) { c1 = egetc(istream); @@ -335,10 +336,10 @@ int main(int argc,char **argv) else if (strcmp(*argv,"-h")==0) { if (argc > 1) { argc--; argv++; - if (prefix_str) + if (prefix_str) free(prefix_str); prefix_str = (char*)malloc(sizeof(char)*(1+strlen(*argv))); - if (prefix_str) + if (prefix_str) strcpy(prefix_str, *argv); } } else if (strcmp(*argv,"-#")==0) @@ -362,16 +363,16 @@ int main(int argc,char **argv) file = "stdin"; } else - if ((istream=fopen(argv[0], "r")) == NULL) { + if ((istream=__hs_fopen(argv[0], "r")) == NULL) { fprintf(stderr, CANNOTOPEN, argv[0]); exit(1); } ofilename=argv[1]; - if (strcmp(argv[1], "-")==0) - ostream = stdout; + if (strcmp(argv[1], "-")==0) + ostream = stdout; else - if ((ostream=fopen(argv[1], "w")) == NULL) { + if ((ostream=__hs_fopen(argv[1], "w")) == NULL) { fprintf(stderr, CANNOTOPEN, argv[1]); exit(1); } diff --git a/utils/unlit/unlit.cabal b/utils/unlit/unlit.cabal index a621f04bc7..622a55934d 100644 --- a/utils/unlit/unlit.cabal +++ b/utils/unlit/unlit.cabal @@ -13,4 +13,5 @@ build-type: Simple Executable unlit Default-Language: Haskell2010 Main-Is: unlit.c - C-Sources: unlit.c + C-Sources: unlit.c, fs.c + Includes: fs.h |