summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Allsopp <david.allsopp@metastack.com>2021-09-01 18:35:06 +0100
committerDavid Allsopp <david.allsopp@metastack.com>2021-09-01 18:35:36 +0100
commit7e78d17901c6eb80db2b87d1c39047f1a8e93a46 (patch)
tree200058e442f2ef146029868f3f6d31d43d732557
parentf1bd190ad4978c33c13457948b293ff6fb85d838 (diff)
downloadocaml-7e78d17901c6eb80db2b87d1c39047f1a8e93a46.tar.gz
Merge pull request #10451 from dra27/no-scripting-for-4.13
Fix lack of iconv on Cygwin and build performance regression on Windows (cherry picked from commit 3a0b1b1e15bc2f33bc471255826d3672b5b27aa9)
-rw-r--r--.gitattributes2
-rw-r--r--.gitignore2
-rw-r--r--Changes16
-rw-r--r--Makefile3
-rw-r--r--Makefile.common12
-rw-r--r--api_docgen/Makefile.common2
-rw-r--r--manual/tests/Makefile2
-rw-r--r--runtime/Makefile42
-rw-r--r--runtime/dynlink.c2
-rw-r--r--runtime/sak.c148
-rw-r--r--runtime/startup_byt.c2
-rw-r--r--stdlib/StdlibModules26
12 files changed, 208 insertions, 51 deletions
diff --git a/.gitattributes b/.gitattributes
index 699b865923..956d21360b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -101,6 +101,8 @@ otherlibs/win32unix/readlink.c typo.long-line
otherlibs/win32unix/stat.c typo.long-line
otherlibs/win32unix/symlink.c typo.long-line
+runtime/sak.c typo.non-ascii
+
stdlib/hashbang typo.white-at-eol typo.missing-lf
testsuite/tests/** typo.missing-header typo.long-line=may
diff --git a/.gitignore b/.gitignore
index 52107195f3..cd5bf11c21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -198,6 +198,8 @@ _build
/runtime/ld.conf
/runtime/.gdb_history
/runtime/.dep
+/runtime/build_config.h
+/runtime/sak
/runtime/domain_state32.inc
/runtime/domain_state64.inc
diff --git a/Changes b/Changes
index 588749256f..e8de2579d8 100644
--- a/Changes
+++ b/Changes
@@ -192,11 +192,11 @@ OCaml 4.13.0
(Gabriel Scherer, review by Nicolás Ojeda Bär, Alain Frisch, Xavier Leroy,
Daniel Bünzli and Stephen Dolan)
-* #10169, #10270, #10301: Use capitalized module names in the Standard Library
- prefixing scheme to match Dune, e.g. Stdlib__String instead of Stdlib__string.
- This is a breaking change only to code which attempted to use the internal
- names before. The Standard Library generated by the Dune rules is now
- equivalent to the main build (the Dune rules still do not generate a
+* #10169, #10270, #10301, #10451: Use capitalized module names in the Standard
+ Library prefixing scheme to match Dune, e.g. Stdlib__String instead of
+ Stdlib__string. This is a breaking change only to code which attempted to use
+ the internal names before. The Standard Library generated by the Dune rules is
+ now equivalent to the main build (the Dune rules still do not generate a
distributable compiler).
(David Allsopp and Mark Shinwell, review by Gabriel Scherer)
@@ -470,6 +470,12 @@ OCaml 4.13.0
which runtime to use while building the compilers (Sébastien Hinderer,
review by David Allsopp)
+- #10451: Replace the use of iconv with a C utility to convert $(LIBDIR) to a
+ C string constant on Windows when building the runtime. Hardens the generation
+ of the constant on Unix for paths with backslashes, double-quotes and
+ newlines.
+ (David Allsopp, review by Florian Angeletti and Sébastien Hinderer)
+
### Bug fixes:
- #6654, #9774, #10401: make `include` and with `constraints` handle correctly
diff --git a/Makefile b/Makefile
index 10d73360df..8d8f1b4159 100644
--- a/Makefile
+++ b/Makefile
@@ -773,6 +773,9 @@ partialclean::
# The runtime system for the bytecode compiler
+$(SAK):
+ $(MAKE) -C runtime sak$(EXE)
+
.PHONY: runtime
runtime: stdlib/libcamlrun.$(A)
diff --git a/Makefile.common b/Makefile.common
index a460cff01b..b3b418e58b 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -160,3 +160,15 @@ OCAMLYACCFLAGS ?=
%.ml %.mli: %.mly
$(OCAMLYACC) $(OCAMLYACCFLAGS) $<
+
+SAK = $(ROOTDIR)/runtime/sak$(EXE)
+
+# stdlib/StdlibModules cannot be include'd unless $(SAK) has been built. These
+# two rules add that dependency. They have to be pattern rules since
+# Makefile.common is included before default targets.
+$(ROOTDIR)/%/sak$(EXE):
+ $(MAKE) -C $(ROOTDIR)/$* sak$(EXE)
+
+ifneq "$(REQUIRES_CONFIGURATION)" ""
+$(ROOTDIR)/%/StdlibModules: $(SAK) ;
+endif
diff --git a/api_docgen/Makefile.common b/api_docgen/Makefile.common
index aab7c1cd32..e360da66d2 100644
--- a/api_docgen/Makefile.common
+++ b/api_docgen/Makefile.common
@@ -15,8 +15,8 @@
ROOTDIR = ..
DOCGEN= $(ROOTDIR)/api_docgen
--include $(ROOTDIR)/stdlib/StdlibModules
include $(ROOTDIR)/Makefile.common
+include $(ROOTDIR)/stdlib/StdlibModules
include $(ROOTDIR)/Makefile.best_binaries
include $(DOCGEN)/Makefile.docfiles
diff --git a/manual/tests/Makefile b/manual/tests/Makefile
index ec3cd499c8..003a3a2a0e 100644
--- a/manual/tests/Makefile
+++ b/manual/tests/Makefile
@@ -1,7 +1,7 @@
ROOTDIR = ../..
-include $(ROOTDIR)/stdlib/StdlibModules
include $(ROOTDIR)/api_docgen/Makefile.docfiles
include $(ROOTDIR)/Makefile.common
+include $(ROOTDIR)/stdlib/StdlibModules
include $(ROOTDIR)/Makefile.best_binaries
STDLIBFLAGS = -nostdlib -I $(ROOTDIR)/stdlib
OCAMLC ?= $(BEST_OCAMLC) $(STDLIBFLAGS)
diff --git a/runtime/Makefile b/runtime/Makefile
index d099f813d0..1b66e9d8c0 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -37,7 +37,7 @@ NATIVE_C_SOURCES := $(addsuffix .c, \
dynlink clambda_checks afl bigarray \
memprof domain skiplist codefrag)
-GENERATED_HEADERS := caml/opnames.h caml/version.h caml/jumptbl.h
+GENERATED_HEADERS := caml/opnames.h caml/version.h caml/jumptbl.h build_config.h
CONFIG_HEADERS := caml/m.h caml/s.h
ifeq "$(TOOLCHAIN)" "msvc"
@@ -105,25 +105,6 @@ libasmrunpic_OBJECTS := $(NATIVE_C_SOURCES:.c=.npic.$(O)) \
# General (non target-specific) assembler and compiler flags
-# On Windows, OCAML_STDLIB_DIR needs to be defined dynamically
-
-ifeq "$(UNIX_OR_WIN32)" "win32"
-# OCAML_STDLIB_DIR needs to arrive in dynlink.c as a string which both gcc and
-# msvc are willing parse without warning. This means we can't pass UTF-8
-# directly since, as far as I can tell, cl can cope, but the pre-processor
-# can't. So the string needs to be directly translated to L"" form. To do this,
-# we take advantage of the fact that Cygwin uses GNU libiconv which includes a
-# Java pseudo-encoding which translates any UTF-8 sequences to \uXXXX (and,
-# unlike the C99 pseudo-encoding, emits two surrogate values when needed, rather
-# than \UXXXXXXXX). The \u is then translated to \x in order to accommodate
-# pre-Visual Studio 2013 compilers where \x is a non-standard alias for \u.
-OCAML_STDLIB_DIR = $(shell echo $(LIBDIR)| iconv -t JAVA | sed -e 's/\\u/\\x/g')
-STDLIB_CPP_FLAG = -DOCAML_STDLIB_DIR='L"$(OCAML_STDLIB_DIR)"'
-else # Unix
-OCAML_STDLIB_DIR = $(LIBDIR)
-STDLIB_CPP_FLAG = -DOCAML_STDLIB_DIR='"$(OCAML_STDLIB_DIR)"'
-endif
-
ifneq "$(CCOMPTYPE)" "msvc"
OC_CFLAGS += -g
endif
@@ -161,7 +142,8 @@ endif
# Build, install and clean targets
.PHONY: all
-all: $(BYTECODE_STATIC_LIBRARIES) $(BYTECODE_SHARED_LIBRARIES) $(PROGRAMS)
+all: $(BYTECODE_STATIC_LIBRARIES) $(BYTECODE_SHARED_LIBRARIES) $(PROGRAMS) \
+ sak$(EXE)
.PHONY: allopt
ifneq "$(NATIVE_COMPILER)" "false"
@@ -192,8 +174,8 @@ endif
.PHONY: clean
clean:
rm -f *.o *.obj *.a *.lib *.so *.dll ld.conf
- rm -f ocamlrun ocamlrund ocamlruni ocamlruns
- rm -f ocamlrun.exe ocamlrund.exe ocamlruni.exe ocamlruns.exe
+ rm -f ocamlrun ocamlrund ocamlruni ocamlruns sak
+ rm -f ocamlrun.exe ocamlrund.exe ocamlruni.exe ocamlruns.exe sak.exe
rm -f primitives primitives.new prims.c $(GENERATED_HEADERS)
rm -f domain_state*.inc
rm -rf $(DEPDIR)
@@ -261,6 +243,16 @@ caml/jumptbl.h : caml/instruct.h
caml/version.h : $(ROOTDIR)/tools/make-version-header.sh $(ROOTDIR)/VERSION
$^ > $@
+sak$(EXE): sak.$(O)
+ $(call MKEXE_USING_COMPILER,$@,$^)
+
+C_LITERAL = $(shell ./sak$(EXE) encode-C-literal '$(1)')
+
+build_config.h: $(ROOTDIR)/Makefile.config sak$(EXE)
+ echo '/* This file is generated from $(ROOTDIR)/Makefile.config */' > $@
+ echo '#define OCAML_STDLIB_DIR $(call C_LITERAL,$(LIBDIR))' >> $@
+ echo '#define HOST "$(HOST)"' >> $@
+
# Libraries and programs
ocamlrun$(EXE): prims.$(O) libcamlrun.$(A)
@@ -365,10 +357,6 @@ endif
$(foreach object_type, $(object_types), \
$(eval $(call COMPILE_C_FILE,$(object_type),%)))
-dynlink.%.$(O): OC_CPPFLAGS += $(STDLIB_CPP_FLAG)
-
-startup_byt.%.$(O): OC_CPPFLAGS += $(STDLIB_CPP_FLAG) -DHOST='"$(HOST)"'
-
$(UNIX_OR_WIN32)_non_shared.%.$(O): OC_CPPFLAGS += -DBUILDING_LIBCAMLRUNS
$(eval $(call COMPILE_C_FILE,$(UNIX_OR_WIN32)_non_shared.%,$(UNIX_OR_WIN32)))
diff --git a/runtime/dynlink.c b/runtime/dynlink.c
index 93a7c6b987..b92d5e36eb 100644
--- a/runtime/dynlink.c
+++ b/runtime/dynlink.c
@@ -36,6 +36,8 @@
#include "caml/prims.h"
#include "caml/signals.h"
+#include "build_config.h"
+
#ifndef NATIVE_CODE
/* The table of primitives */
diff --git a/runtime/sak.c b/runtime/sak.c
new file mode 100644
index 0000000000..76577dedb9
--- /dev/null
+++ b/runtime/sak.c
@@ -0,0 +1,148 @@
+/**************************************************************************/
+/* */
+/* OCaml */
+/* */
+/* David Allsopp, OCaml Labs, Cambridge. */
+/* */
+/* Copyright 2021 David Allsopp Ltd. */
+/* */
+/* All rights reserved. This file is distributed under the terms of */
+/* the GNU Lesser General Public License version 2.1, with the */
+/* special exception on linking described in the file LICENSE. */
+/* */
+/**************************************************************************/
+
+/* Runtime Builder's Swiss Army Knife. This utility performs functions
+ previously delegated to classic Unix utilities but which ultimately seem to
+ cause more hassle for maintenance than the initial simplicity suggests.
+
+ This tool is a memorial to the many hours and PRs spent chasing down strange
+ locale issues, stray CR characters and fighting yet another incompatible
+ implementation of sed or awk. */
+
+/* Borrow the Unicode *_os definitions and T() macro from misc.h */
+#define CAML_INTERNALS
+#include "caml/misc.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#ifdef _WIN32
+#define strncmp_os wcsncmp
+#define toupper_os towupper
+#define printf_os wprintf
+#else
+#define strncmp_os strncmp
+#define toupper_os toupper
+#define printf_os printf
+#endif
+
+/* Operations
+ - encode-C-literal. Used for the OCAML_STDLIB_DIR macro in
+ runtime/build_config.h to ensure the LIBDIR make variable is correctly
+ represented as a C string literal.
+
+ On Unix, `sak encode-C-literal /usr/local/lib` returns `"/usr/local/lib"`
+
+ On Windows, `sak encode-C-literal "C:\OCaml🐫\lib"` returns
+ `L"C:\\OCaml\xd83d\xdc2b\\lib"`
+ - add-stdlib-prefix. Used in stdlib/StdlibModules to convert the list of
+ basenames given in STDLIB_MODULE_BASENAMES to the actual file basenames
+ in STDLIB_MODULES.
+
+ For example, `sak add-stdlib-prefix stdlib camlinternalAtomic Sys` returns
+ ` stdlib camlinternalAtomic stdlib__Sys`
+ */
+
+void usage(void)
+{
+ printf(
+ "OCaml Build System Swiss Army Knife\n"
+ "Usage: sak command\n"
+ "Commands:\n"
+ " * encode-C-literal path - encodes path as a C string literal\n"
+ " * add-stdlib-prefix name1 ... - prefix standard library module names\n"
+ );
+}
+
+/* Converts the supplied path (UTF-8 on Unix and UCS-2ish on Windows) to a valid
+ C string literal. On Windows, this is always a wchar_t* (L"..."). */
+void encode_C_literal(char_os *path)
+{
+ char_os c;
+
+#ifdef _WIN32
+ putchar('L');
+#endif
+ putchar('"');
+
+ while ((c = *path++) != 0) {
+ /* Escape \, " and \n */
+ if (c == '\\') {
+ printf("\\\\");
+ } else if (c == '"') {
+ printf("\\\"");
+ } else if (c == '\n') {
+ printf("\\n");
+#ifndef _WIN32
+ /* On Unix, nothing else needs escaping */
+ } else {
+ putchar(c);
+#else
+ /* On Windows, allow 7-bit printable characters to be displayed literally
+ and escape everything else (using the older \x notation for increased
+ compatibility, rather than the newer \U. */
+ } else if (c < 0x80 && iswprint(c)) {
+ putwchar(c);
+ } else {
+ printf("\\x%04x", c);
+#endif
+ }
+ }
+
+ putchar('"');
+}
+
+/* Print the given array of module names to stdout. "stdlib" and names beginning
+ "camlinternal" are printed unaltered. All other names are prefixed "stdlib__"
+ with the original name capitalised (i.e. "foo" prints "stdlib__Foo"). */
+void add_stdlib_prefix(int count, char_os **names)
+{
+ int i;
+ char_os *name;
+
+ for (i = 0; i < count; i++) {
+ name = *names++;
+
+ /* "stdlib" and camlinternal* do not get changed. All other names get
+ capitalised and prefixed "stdlib__". */
+ if (strcmp_os(T("stdlib"), name) == 0
+ || strncmp_os(T("camlinternal"), name, 12) == 0) {
+ printf_os(T(" %s"), name);
+ } else {
+ /* name is a null-terminated string, so an empty string simply has the
+ null-terminator "capitalised". */
+ *name = toupper_os(*name);
+ printf_os(T(" stdlib__%s"), name);
+ }
+ }
+}
+
+#ifdef _WIN32
+int wmain(int argc, wchar_t **argv)
+#else
+int main(int argc, char **argv)
+#endif
+{
+ if (argc == 3 && !strcmp_os(argv[1], T("encode-C-literal"))) {
+ encode_C_literal(argv[2]);
+ } else if (argc > 1 && !strcmp_os(argv[1], T("add-stdlib-prefix"))) {
+ add_stdlib_prefix(argc - 2, &argv[2]);
+ } else {
+ usage();
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/runtime/startup_byt.c b/runtime/startup_byt.c
index 3998a33b0f..de7549748a 100644
--- a/runtime/startup_byt.c
+++ b/runtime/startup_byt.c
@@ -61,6 +61,8 @@
#include "caml/startup_aux.h"
#include "caml/version.h"
+#include "build_config.h"
+
#ifndef O_BINARY
#define O_BINARY 0
#endif
diff --git a/stdlib/StdlibModules b/stdlib/StdlibModules
index 4d3351c3a3..b4baaeda84 100644
--- a/stdlib/StdlibModules
+++ b/stdlib/StdlibModules
@@ -15,7 +15,7 @@
#* *
#**************************************************************************
-# This file must be self-contained.
+# This file should be included after Makefile.common
# This file lists all standard library modules. It is used by:
# 1. stdlib/Makefile when building stdlib.cma
@@ -33,7 +33,7 @@
# Basenames of the source files for the standard library (i.e. unprefixed and
# with lowercase first letters). These must be listed in dependency order.
-STDLIB_MODULE_BASENAMES=\
+STDLIB_MODULE_BASENAMES = \
camlinternalFormatBasics camlinternalAtomic \
stdlib pervasives seq option either result bool char uchar \
sys list int bytes string unit marshal obj array float int32 int64 nativeint \
@@ -44,20 +44,12 @@ STDLIB_MODULE_BASENAMES=\
filename complex arrayLabels listLabels bytesLabels stringLabels moreLabels \
stdLabels bigarray
-STDLIB_PREFIXED_MODULES=\
+STDLIB_PREFIXED_MODULES = \
$(filter-out stdlib camlinternal%, $(STDLIB_MODULE_BASENAMES))
-define add_stdlib_prefix_first
-$(shell echo $1 | cut -c1 | tr '[:lower:]' '[:upper:]')
-endef
-
-# add stdlib__ as prefix to a module except for internal modules
-# and the stdlib module itself
-define add_stdlib_prefix
- $(or $(filter-out $(STDLIB_PREFIXED_MODULES), $1), \
- stdlib__$(call add_stdlib_prefix_first,$1)$(shell echo $1 | cut -c2-))
-endef
-
-STDLIB_MODULES:=\
- $(foreach module, $(STDLIB_MODULE_BASENAMES), \
- $(call add_stdlib_prefix,$(module)))
+# The pattern FOO = $(eval FOO := $$(shell <cmd>)$(FOO) ensures that <cmd> is
+# executed either once or not at all, giving us GNU make's equivalent of a
+# string lazy_t.
+STDLIB_MODULES = \
+ $(eval STDLIB_MODULES := $$(shell \
+ $(SAK) add-stdlib-prefix $(STDLIB_MODULE_BASENAMES)))$(STDLIB_MODULES)