From 7e78d17901c6eb80db2b87d1c39047f1a8e93a46 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Wed, 1 Sep 2021 18:35:06 +0100 Subject: Merge pull request #10451 from dra27/no-scripting-for-4.13 Fix lack of iconv on Cygwin and build performance regression on Windows (cherry picked from commit 3a0b1b1e15bc2f33bc471255826d3672b5b27aa9) --- .gitattributes | 2 + .gitignore | 2 + Changes | 16 +++-- Makefile | 3 + Makefile.common | 12 ++++ api_docgen/Makefile.common | 2 +- manual/tests/Makefile | 2 +- runtime/Makefile | 42 +++++-------- runtime/dynlink.c | 2 + runtime/sak.c | 148 +++++++++++++++++++++++++++++++++++++++++++++ runtime/startup_byt.c | 2 + stdlib/StdlibModules | 26 +++----- 12 files changed, 208 insertions(+), 51 deletions(-) create mode 100644 runtime/sak.c diff --git a/.gitattributes b/.gitattributes index 699b865923..956d21360b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -101,6 +101,8 @@ otherlibs/win32unix/readlink.c typo.long-line otherlibs/win32unix/stat.c typo.long-line otherlibs/win32unix/symlink.c typo.long-line +runtime/sak.c typo.non-ascii + stdlib/hashbang typo.white-at-eol typo.missing-lf testsuite/tests/** typo.missing-header typo.long-line=may diff --git a/.gitignore b/.gitignore index 52107195f3..cd5bf11c21 100644 --- a/.gitignore +++ b/.gitignore @@ -198,6 +198,8 @@ _build /runtime/ld.conf /runtime/.gdb_history /runtime/.dep +/runtime/build_config.h +/runtime/sak /runtime/domain_state32.inc /runtime/domain_state64.inc diff --git a/Changes b/Changes index 588749256f..e8de2579d8 100644 --- a/Changes +++ b/Changes @@ -192,11 +192,11 @@ OCaml 4.13.0 (Gabriel Scherer, review by Nicolás Ojeda Bär, Alain Frisch, Xavier Leroy, Daniel Bünzli and Stephen Dolan) -* #10169, #10270, #10301: Use capitalized module names in the Standard Library - prefixing scheme to match Dune, e.g. Stdlib__String instead of Stdlib__string. - This is a breaking change only to code which attempted to use the internal - names before. The Standard Library generated by the Dune rules is now - equivalent to the main build (the Dune rules still do not generate a +* #10169, #10270, #10301, #10451: Use capitalized module names in the Standard + Library prefixing scheme to match Dune, e.g. Stdlib__String instead of + Stdlib__string. This is a breaking change only to code which attempted to use + the internal names before. The Standard Library generated by the Dune rules is + now equivalent to the main build (the Dune rules still do not generate a distributable compiler). (David Allsopp and Mark Shinwell, review by Gabriel Scherer) @@ -470,6 +470,12 @@ OCaml 4.13.0 which runtime to use while building the compilers (Sébastien Hinderer, review by David Allsopp) +- #10451: Replace the use of iconv with a C utility to convert $(LIBDIR) to a + C string constant on Windows when building the runtime. Hardens the generation + of the constant on Unix for paths with backslashes, double-quotes and + newlines. + (David Allsopp, review by Florian Angeletti and Sébastien Hinderer) + ### Bug fixes: - #6654, #9774, #10401: make `include` and with `constraints` handle correctly diff --git a/Makefile b/Makefile index 10d73360df..8d8f1b4159 100644 --- a/Makefile +++ b/Makefile @@ -773,6 +773,9 @@ partialclean:: # The runtime system for the bytecode compiler +$(SAK): + $(MAKE) -C runtime sak$(EXE) + .PHONY: runtime runtime: stdlib/libcamlrun.$(A) diff --git a/Makefile.common b/Makefile.common index a460cff01b..b3b418e58b 100644 --- a/Makefile.common +++ b/Makefile.common @@ -160,3 +160,15 @@ OCAMLYACCFLAGS ?= %.ml %.mli: %.mly $(OCAMLYACC) $(OCAMLYACCFLAGS) $< + +SAK = $(ROOTDIR)/runtime/sak$(EXE) + +# stdlib/StdlibModules cannot be include'd unless $(SAK) has been built. These +# two rules add that dependency. They have to be pattern rules since +# Makefile.common is included before default targets. +$(ROOTDIR)/%/sak$(EXE): + $(MAKE) -C $(ROOTDIR)/$* sak$(EXE) + +ifneq "$(REQUIRES_CONFIGURATION)" "" +$(ROOTDIR)/%/StdlibModules: $(SAK) ; +endif diff --git a/api_docgen/Makefile.common b/api_docgen/Makefile.common index aab7c1cd32..e360da66d2 100644 --- a/api_docgen/Makefile.common +++ b/api_docgen/Makefile.common @@ -15,8 +15,8 @@ ROOTDIR = .. DOCGEN= $(ROOTDIR)/api_docgen --include $(ROOTDIR)/stdlib/StdlibModules include $(ROOTDIR)/Makefile.common +include $(ROOTDIR)/stdlib/StdlibModules include $(ROOTDIR)/Makefile.best_binaries include $(DOCGEN)/Makefile.docfiles diff --git a/manual/tests/Makefile b/manual/tests/Makefile index ec3cd499c8..003a3a2a0e 100644 --- a/manual/tests/Makefile +++ b/manual/tests/Makefile @@ -1,7 +1,7 @@ ROOTDIR = ../.. -include $(ROOTDIR)/stdlib/StdlibModules include $(ROOTDIR)/api_docgen/Makefile.docfiles include $(ROOTDIR)/Makefile.common +include $(ROOTDIR)/stdlib/StdlibModules include $(ROOTDIR)/Makefile.best_binaries STDLIBFLAGS = -nostdlib -I $(ROOTDIR)/stdlib OCAMLC ?= $(BEST_OCAMLC) $(STDLIBFLAGS) diff --git a/runtime/Makefile b/runtime/Makefile index d099f813d0..1b66e9d8c0 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -37,7 +37,7 @@ NATIVE_C_SOURCES := $(addsuffix .c, \ dynlink clambda_checks afl bigarray \ memprof domain skiplist codefrag) -GENERATED_HEADERS := caml/opnames.h caml/version.h caml/jumptbl.h +GENERATED_HEADERS := caml/opnames.h caml/version.h caml/jumptbl.h build_config.h CONFIG_HEADERS := caml/m.h caml/s.h ifeq "$(TOOLCHAIN)" "msvc" @@ -105,25 +105,6 @@ libasmrunpic_OBJECTS := $(NATIVE_C_SOURCES:.c=.npic.$(O)) \ # General (non target-specific) assembler and compiler flags -# On Windows, OCAML_STDLIB_DIR needs to be defined dynamically - -ifeq "$(UNIX_OR_WIN32)" "win32" -# OCAML_STDLIB_DIR needs to arrive in dynlink.c as a string which both gcc and -# msvc are willing parse without warning. This means we can't pass UTF-8 -# directly since, as far as I can tell, cl can cope, but the pre-processor -# can't. So the string needs to be directly translated to L"" form. To do this, -# we take advantage of the fact that Cygwin uses GNU libiconv which includes a -# Java pseudo-encoding which translates any UTF-8 sequences to \uXXXX (and, -# unlike the C99 pseudo-encoding, emits two surrogate values when needed, rather -# than \UXXXXXXXX). The \u is then translated to \x in order to accommodate -# pre-Visual Studio 2013 compilers where \x is a non-standard alias for \u. -OCAML_STDLIB_DIR = $(shell echo $(LIBDIR)| iconv -t JAVA | sed -e 's/\\u/\\x/g') -STDLIB_CPP_FLAG = -DOCAML_STDLIB_DIR='L"$(OCAML_STDLIB_DIR)"' -else # Unix -OCAML_STDLIB_DIR = $(LIBDIR) -STDLIB_CPP_FLAG = -DOCAML_STDLIB_DIR='"$(OCAML_STDLIB_DIR)"' -endif - ifneq "$(CCOMPTYPE)" "msvc" OC_CFLAGS += -g endif @@ -161,7 +142,8 @@ endif # Build, install and clean targets .PHONY: all -all: $(BYTECODE_STATIC_LIBRARIES) $(BYTECODE_SHARED_LIBRARIES) $(PROGRAMS) +all: $(BYTECODE_STATIC_LIBRARIES) $(BYTECODE_SHARED_LIBRARIES) $(PROGRAMS) \ + sak$(EXE) .PHONY: allopt ifneq "$(NATIVE_COMPILER)" "false" @@ -192,8 +174,8 @@ endif .PHONY: clean clean: rm -f *.o *.obj *.a *.lib *.so *.dll ld.conf - rm -f ocamlrun ocamlrund ocamlruni ocamlruns - rm -f ocamlrun.exe ocamlrund.exe ocamlruni.exe ocamlruns.exe + rm -f ocamlrun ocamlrund ocamlruni ocamlruns sak + rm -f ocamlrun.exe ocamlrund.exe ocamlruni.exe ocamlruns.exe sak.exe rm -f primitives primitives.new prims.c $(GENERATED_HEADERS) rm -f domain_state*.inc rm -rf $(DEPDIR) @@ -261,6 +243,16 @@ caml/jumptbl.h : caml/instruct.h caml/version.h : $(ROOTDIR)/tools/make-version-header.sh $(ROOTDIR)/VERSION $^ > $@ +sak$(EXE): sak.$(O) + $(call MKEXE_USING_COMPILER,$@,$^) + +C_LITERAL = $(shell ./sak$(EXE) encode-C-literal '$(1)') + +build_config.h: $(ROOTDIR)/Makefile.config sak$(EXE) + echo '/* This file is generated from $(ROOTDIR)/Makefile.config */' > $@ + echo '#define OCAML_STDLIB_DIR $(call C_LITERAL,$(LIBDIR))' >> $@ + echo '#define HOST "$(HOST)"' >> $@ + # Libraries and programs ocamlrun$(EXE): prims.$(O) libcamlrun.$(A) @@ -365,10 +357,6 @@ endif $(foreach object_type, $(object_types), \ $(eval $(call COMPILE_C_FILE,$(object_type),%))) -dynlink.%.$(O): OC_CPPFLAGS += $(STDLIB_CPP_FLAG) - -startup_byt.%.$(O): OC_CPPFLAGS += $(STDLIB_CPP_FLAG) -DHOST='"$(HOST)"' - $(UNIX_OR_WIN32)_non_shared.%.$(O): OC_CPPFLAGS += -DBUILDING_LIBCAMLRUNS $(eval $(call COMPILE_C_FILE,$(UNIX_OR_WIN32)_non_shared.%,$(UNIX_OR_WIN32))) diff --git a/runtime/dynlink.c b/runtime/dynlink.c index 93a7c6b987..b92d5e36eb 100644 --- a/runtime/dynlink.c +++ b/runtime/dynlink.c @@ -36,6 +36,8 @@ #include "caml/prims.h" #include "caml/signals.h" +#include "build_config.h" + #ifndef NATIVE_CODE /* The table of primitives */ diff --git a/runtime/sak.c b/runtime/sak.c new file mode 100644 index 0000000000..76577dedb9 --- /dev/null +++ b/runtime/sak.c @@ -0,0 +1,148 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* David Allsopp, OCaml Labs, Cambridge. */ +/* */ +/* Copyright 2021 David Allsopp Ltd. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* Runtime Builder's Swiss Army Knife. This utility performs functions + previously delegated to classic Unix utilities but which ultimately seem to + cause more hassle for maintenance than the initial simplicity suggests. + + This tool is a memorial to the many hours and PRs spent chasing down strange + locale issues, stray CR characters and fighting yet another incompatible + implementation of sed or awk. */ + +/* Borrow the Unicode *_os definitions and T() macro from misc.h */ +#define CAML_INTERNALS +#include "caml/misc.h" + +#include +#include +#include + +#ifdef _WIN32 +#define strncmp_os wcsncmp +#define toupper_os towupper +#define printf_os wprintf +#else +#define strncmp_os strncmp +#define toupper_os toupper +#define printf_os printf +#endif + +/* Operations + - encode-C-literal. Used for the OCAML_STDLIB_DIR macro in + runtime/build_config.h to ensure the LIBDIR make variable is correctly + represented as a C string literal. + + On Unix, `sak encode-C-literal /usr/local/lib` returns `"/usr/local/lib"` + + On Windows, `sak encode-C-literal "C:\OCaml🐫\lib"` returns + `L"C:\\OCaml\xd83d\xdc2b\\lib"` + - add-stdlib-prefix. Used in stdlib/StdlibModules to convert the list of + basenames given in STDLIB_MODULE_BASENAMES to the actual file basenames + in STDLIB_MODULES. + + For example, `sak add-stdlib-prefix stdlib camlinternalAtomic Sys` returns + ` stdlib camlinternalAtomic stdlib__Sys` + */ + +void usage(void) +{ + printf( + "OCaml Build System Swiss Army Knife\n" + "Usage: sak command\n" + "Commands:\n" + " * encode-C-literal path - encodes path as a C string literal\n" + " * add-stdlib-prefix name1 ... - prefix standard library module names\n" + ); +} + +/* Converts the supplied path (UTF-8 on Unix and UCS-2ish on Windows) to a valid + C string literal. On Windows, this is always a wchar_t* (L"..."). */ +void encode_C_literal(char_os *path) +{ + char_os c; + +#ifdef _WIN32 + putchar('L'); +#endif + putchar('"'); + + while ((c = *path++) != 0) { + /* Escape \, " and \n */ + if (c == '\\') { + printf("\\\\"); + } else if (c == '"') { + printf("\\\""); + } else if (c == '\n') { + printf("\\n"); +#ifndef _WIN32 + /* On Unix, nothing else needs escaping */ + } else { + putchar(c); +#else + /* On Windows, allow 7-bit printable characters to be displayed literally + and escape everything else (using the older \x notation for increased + compatibility, rather than the newer \U. */ + } else if (c < 0x80 && iswprint(c)) { + putwchar(c); + } else { + printf("\\x%04x", c); +#endif + } + } + + putchar('"'); +} + +/* Print the given array of module names to stdout. "stdlib" and names beginning + "camlinternal" are printed unaltered. All other names are prefixed "stdlib__" + with the original name capitalised (i.e. "foo" prints "stdlib__Foo"). */ +void add_stdlib_prefix(int count, char_os **names) +{ + int i; + char_os *name; + + for (i = 0; i < count; i++) { + name = *names++; + + /* "stdlib" and camlinternal* do not get changed. All other names get + capitalised and prefixed "stdlib__". */ + if (strcmp_os(T("stdlib"), name) == 0 + || strncmp_os(T("camlinternal"), name, 12) == 0) { + printf_os(T(" %s"), name); + } else { + /* name is a null-terminated string, so an empty string simply has the + null-terminator "capitalised". */ + *name = toupper_os(*name); + printf_os(T(" stdlib__%s"), name); + } + } +} + +#ifdef _WIN32 +int wmain(int argc, wchar_t **argv) +#else +int main(int argc, char **argv) +#endif +{ + if (argc == 3 && !strcmp_os(argv[1], T("encode-C-literal"))) { + encode_C_literal(argv[2]); + } else if (argc > 1 && !strcmp_os(argv[1], T("add-stdlib-prefix"))) { + add_stdlib_prefix(argc - 2, &argv[2]); + } else { + usage(); + return 1; + } + + return 0; +} diff --git a/runtime/startup_byt.c b/runtime/startup_byt.c index 3998a33b0f..de7549748a 100644 --- a/runtime/startup_byt.c +++ b/runtime/startup_byt.c @@ -61,6 +61,8 @@ #include "caml/startup_aux.h" #include "caml/version.h" +#include "build_config.h" + #ifndef O_BINARY #define O_BINARY 0 #endif diff --git a/stdlib/StdlibModules b/stdlib/StdlibModules index 4d3351c3a3..b4baaeda84 100644 --- a/stdlib/StdlibModules +++ b/stdlib/StdlibModules @@ -15,7 +15,7 @@ #* * #************************************************************************** -# This file must be self-contained. +# This file should be included after Makefile.common # This file lists all standard library modules. It is used by: # 1. stdlib/Makefile when building stdlib.cma @@ -33,7 +33,7 @@ # Basenames of the source files for the standard library (i.e. unprefixed and # with lowercase first letters). These must be listed in dependency order. -STDLIB_MODULE_BASENAMES=\ +STDLIB_MODULE_BASENAMES = \ camlinternalFormatBasics camlinternalAtomic \ stdlib pervasives seq option either result bool char uchar \ sys list int bytes string unit marshal obj array float int32 int64 nativeint \ @@ -44,20 +44,12 @@ STDLIB_MODULE_BASENAMES=\ filename complex arrayLabels listLabels bytesLabels stringLabels moreLabels \ stdLabels bigarray -STDLIB_PREFIXED_MODULES=\ +STDLIB_PREFIXED_MODULES = \ $(filter-out stdlib camlinternal%, $(STDLIB_MODULE_BASENAMES)) -define add_stdlib_prefix_first -$(shell echo $1 | cut -c1 | tr '[:lower:]' '[:upper:]') -endef - -# add stdlib__ as prefix to a module except for internal modules -# and the stdlib module itself -define add_stdlib_prefix - $(or $(filter-out $(STDLIB_PREFIXED_MODULES), $1), \ - stdlib__$(call add_stdlib_prefix_first,$1)$(shell echo $1 | cut -c2-)) -endef - -STDLIB_MODULES:=\ - $(foreach module, $(STDLIB_MODULE_BASENAMES), \ - $(call add_stdlib_prefix,$(module))) +# The pattern FOO = $(eval FOO := $$(shell )$(FOO) ensures that is +# executed either once or not at all, giving us GNU make's equivalent of a +# string lazy_t. +STDLIB_MODULES = \ + $(eval STDLIB_MODULES := $$(shell \ + $(SAK) add-stdlib-prefix $(STDLIB_MODULE_BASENAMES)))$(STDLIB_MODULES) -- cgit v1.2.1