#************************************************************************** #* * #* OCaml * #* * #* Gabriel Scherer, projet Parsifal, INRIA Saclay * #* * #* Copyright 2018 Institut National de Recherche en Informatique et * #* en Automatique. * #* * #* All rights reserved. This file is distributed under the terms of * #* the GNU Lesser General Public License version 2.1, with the * #* special exception on linking described in the file LICENSE. * #* * #************************************************************************** # The rules in this Makefile use Menhir to rebuild the OCaml compiler # parser. They are included in the main Makefile, so should be invoked # directly, for example 'make promote-menhir'. They must be called # after any modification to parsing/parser.mly, for the modification # to affect the parser linked in the produced compiler: # # - promote-menhir builds the parser from parser.mly and stores it in # the boot/ directory, so that future builds of the compiler use the # updated result. Use it to make permanent changes to the compiler # parser. # # - demote-menhir undoes the effect of promote-menhir. The files in # the boot/ directory that are affected by promote-menhir and are # under version control are restored to their normal state (HEAD). # # - test-menhir builds the parser from parser.mly without storing it # in the boot/ directory, and only checks that the generated parser # builds correctly. Use it to quickly check if a parser.mly change # breaks the build. If you want to test a compiler produced with # the new parser, you must use promote-menhir instead. # (Using this rule requires a partial compiler build as obtained # by 'make core' or 'make world'.) # # - clean-menhir removes the files generated by Menhir from parsing/, # keeping only the reference sources for the grammar. # # - depend-menhir updates the dependency information for the # Menhir-generated parser, which is versioned in the OCaml repository # like all other .depend files. It should be used when the dependencies # (of the OCaml code in the grammar semantic actions) change. MENHIR ?= menhir ## Unused tokens # The tokens COMMENT, DOCSTRING and EOL are produced by special lexer # modes used by other consumers than the parser. # GREATERBRACKET ">]" was added by the parser by symmetry with "[<" # (which is used in polymorphic variant), but is not currently used by # the grammar. unused_tokens := COMMENT DOCSTRING EOL GREATERRBRACKET ## Menhir's flags. # The basic flags influence the analysis of the grammar and the construction # of the automaton. The complete set of flags includes extra flags that # influence type inference and code production. MENHIRBASICFLAGS := \ --lalr \ --explain \ --dump \ --require-aliases \ --strict \ -lg 1 \ -la 1 \ $(addprefix --unused-token ,$(unused_tokens)) \ MENHIRFLAGS := \ $(MENHIRBASICFLAGS) \ --infer \ --ocamlc "$(CAMLC) $(OC_COMMON_COMPFLAGS) $(INCLUDES)" \ --fixed-exception \ --table \ --strategy simplified \ ## promote-menhir .PHONY: promote-menhir promote-menhir: parsing/parser.mly @ $(MAKE) import-menhirLib $(MENHIR) $(MENHIRFLAGS) parsing/parser.mly # The generated parser.ml may contain lexer directives containing # the absolute path to Menhir's standard library on the promoter's machine. # This is benign but will generate pointless churn if another developer # rebuilds the same grammar (from the same Menhir version). @ for f in $(addprefix parser.,ml mli) ; do \ sed \ 's,^#\(.*\)"[^"]*/menhir/standard.mly",#\1"menhir/standard.mly",g' \ parsing/$$f \ > boot/menhir/$$f; \ rm parsing/$$f; \ done # The import-menhirLib invocation in promote-menhir ensures that each # update of the boot/ parser is paired with an update of the imported # menhirLib; otherwise it would be easy to generate a parser and keep # an incompatible version of menhirLib, which would fail at # compile-time. .PHONY: import-menhirLib import-menhirLib: @ mkdir -p boot/menhir @ cp \ $(addprefix `$(MENHIR) --suggest-menhirLib`/menhirLib.,ml mli) \ boot/menhir ## demote-menhir DEMOTE:=menhirLib.ml menhirLib.mli parser.ml parser.mli .PHONY: demote-menhir demote-menhir: git checkout HEAD -- $(addprefix boot/menhir/,$(DEMOTE)) ## test-menhir # This rule assumes that the `parsing/` sources and its dependencies # have already been compiled; 'make core' suffices to be in that # state. We don't make 'core' an explicit dependency, as building # 'test-menhir' repeatedly would rebuild the compiler each time # (parser.ml has changed), without actually taking the changes from # parser.mly into account ('core' uses the parser from boot/). # The test-menhir target does not read or write the boot directory, # it directly builds the parser in parsing/. In particular, it must # duplicate the MenhirLib->CamlinternalMenhirlib renaming usually # performed by the parsing/parser.ml import rule in the main # Makefile. .PHONY: test-menhir test-menhir: parsing/parser.mly $(MENHIR) $(MENHIRFLAGS) parsing/parser.mly for f in $(addprefix parsing/parser.,ml mli) ; do \ cat $$f | sed "s/MenhirLib/CamlinternalMenhirLib/g" > $$f.tmp && \ mv $$f.tmp $$f ; \ done $(MAKE) parsing/parser.cmo ## clean-menhir partialclean-menhir:: rm -f \ $(addprefix parsing/parser.,ml mli) \ $(addprefix parsing/camlinternalMenhirLib.,ml mli) \ $(addprefix parsing/parser.,automaton conflicts) \ $(addprefix parsing/parser.,auto.messages) \ clean-menhir: partialclean-menhir ## depend-menhir # The following rule depends on the OCAMLDEP_CMD variable defined in # Makefile.common, so it can only be invoked from the main (root) Makefile .PHONY: depend-menhir depend-menhir: $(MENHIR) --depend --ocamldep "$(OCAMLDEP_CMD)" \ parsing/parser.mly > .depend.menhir include .depend.menhir ## interpret-menhir # This rule runs Menhir in interactive mode. # The user can enter sentences, such as: # implementation: TYPE LIDENT EQUAL LIDENT EOF # and see how Menhir interprets them. interpret-menhir: @ echo "Please wait, I am building the LALR automaton..." @ $(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \ --interpret \ --interpret-show-cst \ --trace \ ## list-parse-errors # This rule runs Menhir's reachability analysis, which produces a list of all # states where a syntax error can be detected (and a corresponding list of of # erroneous sentences). This data is stored in parsing/parser.auto.messages. # This analysis requires about 3 minutes and 6GB of RAM. # The analysis is performed on a copy of the grammar where every block # of text comprised between the markers BEGIN AVOID and END AVOID has # been removed. This allows us to avoid certain syntactic forms in the # sentences that we produce. See parser.mly for more explanations. # Because of this, we must run Menhir twice: once on a modified copy of the # grammar to produce the sentences, and once on the original grammar to update # the auto-comments (which would otherwise be incorrect). .PHONY: list-parse-errors list-parse-errors: @ tmp=`mktemp -d /tmp/parser.XXXX` && \ sed -e '/BEGIN AVOID/,/END AVOID/d' \ parsing/parser.mly > $$tmp/parser.mly && \ $(MENHIR) $(MENHIRBASICFLAGS) $$tmp/parser.mly \ --list-errors -la 2 \ > parsing/parser.auto.messages && \ rm -rf $$tmp @ cp parsing/parser.auto.messages parsing/parser.auto.messages.bak @ $(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \ --update-errors parsing/parser.auto.messages.bak \ > parsing/parser.auto.messages @ rm -f parsing/parser.auto.messages.bak ## generate-parse-errors # This rule assumes that [make list-parse-errors] has been run first. # This rule turns the error sentences stored in parsing/parser.auto.messages # into one .ml file. # (It would in principle be preferable to create one file per sentence, but # that would be much slower. We abuse the ability of the OCaml toplevel to # resynchronize after an error, and put all sentences into a single file.) # This requires Menhir 20201214 or newer. GPE_DIR := tests/generated-parse-errors GPE_ML := errors.ml GPE_REF := errors.compilers.reference GPE_START := implementation use_file toplevel_phrase .PHONY: generate-parse-errors generate-parse-errors: @ \ mkdir -p testsuite/$(GPE_DIR) && \ $(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \ --echo-errors-concrete parsing/parser.auto.messages 2>/dev/null | \ (cd testsuite/$(GPE_DIR) && touch $(GPE_REF) && ( \ echo "(* TEST\n * toplevel\n*)" && \ while IFS= read -r symbolic ; do \ IFS= read -r concrete ; \ concrete=$${concrete#### Concrete syntax: } ; \ : '$$symbolic is the sentence in symbolic form' ; \ : '$$concrete is the sentence in concrete form' ; \ case "$$symbolic" in \ *": SEMISEMI"*) \ : 'If the sentence begins with SEMISEMI, ignore it. Our hack' ; \ : 'does not support these sentences, and there are only 6 of' ; \ : 'them anyway.' ; \ continue ;; \ *) \ case "$$symbolic" in \ *"EOF") \ : 'If the sentence ends with EOF, replace it on the fly' ; \ : 'with some other token (say, WHEN).' ; \ echo "#0 \"$${symbolic%%EOF}WHEN\"" ; \ echo "$$concrete when" ; \ echo ";;" ;; \ *) \ : 'Emit a # directive containing the symbolic sentence.' ; \ echo "#0 \"$$symbolic\"" ; \ : 'Emit the concrete sentence.' ; \ echo "$$concrete" ; \ : 'Emit a double semicolon to allow resynchronization.' ; \ echo ";;" ;; \ esac \ esac \ done) \ > $(GPE_ML) && \ : 'Count how many sentences we have emitted, per start symbol.' ; \ for symbol in $(GPE_START) ; do \ count=$$(grep -h -e "$$symbol:" $(GPE_ML) | wc -l) && \ echo "$$count sentences whose start symbol is $$symbol." ; \ done \ ) @ \ read -p "Re-generate the expected output for this test? " -n 1 -r && \ echo && \ if [[ $$REPLY =~ ^[Yy]$$ ]] ; then \ make -C testsuite promote DIR=$(GPE_DIR) >/dev/null 2>&1 && \ echo "Done." ; \ make classify-parse-errors ; \ else \ echo "OK, stop." ; \ fi .PHONY: classify-parse-errors classify-parse-errors: @ ( \ cd testsuite/$(GPE_DIR) && \ echo "The parser's output can be described as follows:" && \ c=$$(grep "^Error: Syntax error" $(GPE_REF) | wc -l) && \ echo "$${c} syntax errors reported." && \ c=$$(grep "^Error: Syntax error$$" $(GPE_REF) | wc -l) && \ echo "$${c} errors without an explanation." && \ c=$$(grep "^Error: Syntax" $(GPE_REF) | grep expected | wc -l) && \ echo "$${c} errors with an indication of what was expected." && \ c=$$(grep "might be unmatched" $(GPE_REF) | wc -l) && \ echo "$${c} errors with an indication of an unmatched delimiter." && \ true)