summaryrefslogtreecommitdiff
path: root/clang-tools-extra/pseudo
diff options
context:
space:
mode:
authorSam McCall <sam.mccall@gmail.com>2022-07-22 11:28:51 +0200
committerSam McCall <sam.mccall@gmail.com>2022-07-25 12:57:07 +0200
commitb2b993a6ae675955b1112473c035ae6b4e3932a0 (patch)
treeba24a9904d8aaa77b725b04ed64b6ec584541974 /clang-tools-extra/pseudo
parent1defec87306593e71057de7baf9bd8e2389c2419 (diff)
downloadllvm-b2b993a6ae675955b1112473c035ae6b4e3932a0.tar.gz
[pseudo] Eliminate multiple-specified-types ambiguities using guards
Motivating case: `foo bar;` is not a declaration of nothing with `foo` and `bar` both types. This is a common and critical ambiguity, clangd/AST.cpp has 20% fewer ambiguous nodes (1674->1332) after this change. Differential Revision: https://reviews.llvm.org/D130337
Diffstat (limited to 'clang-tools-extra/pseudo')
-rw-r--r--clang-tools-extra/pseudo/lib/cxx/CXX.cpp121
-rw-r--r--clang-tools-extra/pseudo/lib/cxx/cxx.bnf27
-rw-r--r--clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp27
-rw-r--r--clang-tools-extra/pseudo/test/fuzzer.cpp2
4 files changed, 159 insertions, 18 deletions
diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 7fc3a48d6318..73af0a7a2d52 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -14,7 +14,9 @@
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Debug.h"
#include <utility>
+#define DEBUG_TYPE "CXX.cpp"
namespace clang {
namespace pseudo {
@@ -160,7 +162,106 @@ bool guardNextTokenNotElse(const GuardParams &P) {
return symbolToToken(P.Lookahead) != tok::kw_else;
}
+// Whether this e.g. decl-specifier contains an "exclusive" type such as a class
+// name, and thus can't combine with a second exclusive type.
+//
+// Returns false for
+// - non-types
+// - "unsigned" etc that may suffice as types but may modify others
+// - cases of uncertainty (e.g. due to ambiguity)
+bool hasExclusiveType(const ForestNode *N) {
+ // FIXME: every time we apply this check, we walk the whole subtree.
+ // Add per-node caching instead.
+ while (true) {
+ assert(N->symbol() == (SymbolID)Symbol::decl_specifier_seq ||
+ N->symbol() == (SymbolID)Symbol::type_specifier_seq ||
+ N->symbol() == (SymbolID)Symbol::defining_type_specifier_seq ||
+ N->symbol() == (SymbolID)Symbol::decl_specifier ||
+ N->symbol() == (SymbolID)Symbol::type_specifier ||
+ N->symbol() == (SymbolID)Symbol::defining_type_specifier ||
+ N->symbol() == (SymbolID)Symbol::simple_type_specifier);
+ if (N->kind() == ForestNode::Opaque)
+ return false; // conservative
+ if (N->kind() == ForestNode::Ambiguous)
+ return llvm::all_of(N->alternatives(), hasExclusiveType); // conservative
+ // All supported symbols are nonterminals.
+ assert(N->kind() == ForestNode::Sequence);
+ switch (N->rule()) {
+ // seq := element seq: check element then continue into seq
+ case (RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq:
+ case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq:
+ case (RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq:
+ if (hasExclusiveType(N->children()[0]))
+ return true;
+ N = N->children()[1];
+ continue;
+ // seq := element: continue into element
+ case (RuleID)Rule::decl_specifier_seq_0decl_specifier:
+ case (RuleID)Rule::type_specifier_seq_0type_specifier:
+ case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier:
+ N = N->children()[0];
+ continue;
+
+ // defining-type-specifier
+ case (RuleID)Rule::defining_type_specifier_0type_specifier:
+ N = N->children()[0];
+ continue;
+ case (RuleID)Rule::defining_type_specifier_0class_specifier:
+ case (RuleID)Rule::defining_type_specifier_0enum_specifier:
+ return true;
+
+ // decl-specifier
+ case (RuleID)Rule::decl_specifier_0defining_type_specifier:
+ N = N->children()[0];
+ continue;
+ case (RuleID)Rule::decl_specifier_0consteval:
+ case (RuleID)Rule::decl_specifier_0constexpr:
+ case (RuleID)Rule::decl_specifier_0constinit:
+ case (RuleID)Rule::decl_specifier_0inline:
+ case (RuleID)Rule::decl_specifier_0friend:
+ case (RuleID)Rule::decl_specifier_0storage_class_specifier:
+ case (RuleID)Rule::decl_specifier_0typedef:
+ case (RuleID)Rule::decl_specifier_0function_specifier:
+ return false;
+
+ // type-specifier
+ case (RuleID)Rule::type_specifier_0elaborated_type_specifier:
+ case (RuleID)Rule::type_specifier_0typename_specifier:
+ return true;
+ case (RuleID)Rule::type_specifier_0simple_type_specifier:
+ N = N->children()[0];
+ continue;
+ case (RuleID)Rule::type_specifier_0cv_qualifier:
+ return false;
+
+ // simple-type-specifier
+ case (RuleID)Rule::simple_type_specifier_0type_name:
+ case (RuleID)Rule::simple_type_specifier_0template_name:
+ case (RuleID)Rule::simple_type_specifier_0builtin_type:
+ case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_2simple_template_id:
+ case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_name:
+ case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1type_name:
+ case (RuleID)Rule::simple_type_specifier_0decltype_specifier:
+ case (RuleID)Rule::simple_type_specifier_0placeholder_type_specifier:
+ return true;
+ case (RuleID)Rule::simple_type_specifier_0long:
+ case (RuleID)Rule::simple_type_specifier_0short:
+ case (RuleID)Rule::simple_type_specifier_0signed:
+ case (RuleID)Rule::simple_type_specifier_0unsigned:
+ return false;
+
+ default:
+ LLVM_DEBUG(llvm::errs() << "Unhandled rule " << N->rule() << "\n");
+ llvm_unreachable("hasExclusiveType be exhaustive!");
+ }
+ }
+}
+
llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+#define GUARD(cond) \
+ { \
+ [](const GuardParams &P) { return cond; } \
+ }
#define TOKEN_GUARD(kind, cond) \
[](const GuardParams& P) { \
const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \
@@ -177,6 +278,16 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
{(RuleID)Rule::non_function_declarator_0declarator,
SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))},
+ // A {decl,type,defining-type}-specifier-sequence cannot have multiple
+ // "exclusive" types (like class names): a value has only one type.
+ {(RuleID)Rule::
+ defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq,
+ GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
+ {(RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq,
+ GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
+ {(RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq,
+ GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
+
{(RuleID)Rule::contextual_override_0identifier,
TOKEN_GUARD(identifier, Tok.text() == "override")},
{(RuleID)Rule::contextual_final_0identifier,
@@ -190,10 +301,12 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
{(RuleID)Rule::contextual_zero_0numeric_constant,
TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
- {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
- guardNextTokenNotElse},
- {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
- guardNextTokenNotElse},
+ {(RuleID)Rule::
+ selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
+ guardNextTokenNotElse},
+ {(RuleID)Rule::
+ selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
+ guardNextTokenNotElse},
// The grammar distinguishes (only) user-defined vs plain string literals,
// where the clang lexer distinguishes (only) encoding types.
diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index ca8fc01b8f41..8dfab8b9788d 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -350,7 +350,7 @@ decl-specifier := CONSTEVAL
decl-specifier := CONSTINIT
decl-specifier := INLINE
decl-specifier-seq := decl-specifier
-decl-specifier-seq := decl-specifier decl-specifier-seq
+decl-specifier-seq := decl-specifier decl-specifier-seq [guard]
storage-class-specifier := STATIC
storage-class-specifier := THREAD_LOCAL
storage-class-specifier := EXTERN
@@ -364,31 +364,32 @@ type-specifier := elaborated-type-specifier
type-specifier := typename-specifier
type-specifier := cv-qualifier
type-specifier-seq := type-specifier
-type-specifier-seq := type-specifier type-specifier-seq
+type-specifier-seq := type-specifier type-specifier-seq [guard]
defining-type-specifier := type-specifier
defining-type-specifier := class-specifier
defining-type-specifier := enum-specifier
defining-type-specifier-seq := defining-type-specifier
-defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq
+defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq [guard]
simple-type-specifier := nested-name-specifier_opt type-name
simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id
simple-type-specifier := decltype-specifier
simple-type-specifier := placeholder-type-specifier
simple-type-specifier := nested-name-specifier_opt template-name
-simple-type-specifier := CHAR
-simple-type-specifier := CHAR8_T
-simple-type-specifier := CHAR16_T
-simple-type-specifier := CHAR32_T
-simple-type-specifier := WCHAR_T
-simple-type-specifier := BOOL
+simple-type-specifier := builtin-type
+builtin-type := CHAR
+builtin-type := CHAR8_T
+builtin-type := CHAR16_T
+builtin-type := CHAR32_T
+builtin-type := WCHAR_T
+builtin-type := BOOL
simple-type-specifier := SHORT
-simple-type-specifier := INT
+builtin-type := INT
simple-type-specifier := LONG
simple-type-specifier := SIGNED
simple-type-specifier := UNSIGNED
-simple-type-specifier := FLOAT
-simple-type-specifier := DOUBLE
-simple-type-specifier := VOID
+builtin-type := FLOAT
+builtin-type := DOUBLE
+builtin-type := VOID
type-name := class-name
type-name := enum-name
type-name := typedef-name
diff --git a/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp
new file mode 100644
index 000000000000..255e8bedac49
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp
@@ -0,0 +1,27 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
+
+// not parsed as Type{foo} Type{bar}
+foo bar;
+// CHECK-NOT: simple-declaration := decl-specifier-seq ;
+// CHECK: simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK: ├─decl-specifier-seq~simple-type-specifier
+// CHECK: ├─init-declarator-list~IDENTIFIER
+// CHECK: └─;
+// CHECK-NOT: simple-declaration := decl-specifier-seq ;
+
+// not parsed as Type{std} Type{::string} Declarator{s};
+std::string s;
+// CHECK-NOT: nested-name-specifier := ::
+// CHECK: simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK: ├─decl-specifier-seq~simple-type-specifier := <ambiguous>
+// CHECK: │ ├─simple-type-specifier := nested-name-specifier type-name
+// CHECK: │ │ ├─nested-name-specifier := <ambiguous> #1
+// CHECK: │ │ │ ├─nested-name-specifier := type-name ::
+// CHECK: │ │ │ └─nested-name-specifier := namespace-name ::
+// CHECK: │ │ └─type-name
+// CHECK: │ └─simple-type-specifier := nested-name-specifier template-name
+// CHECK: │ ├─nested-name-specifier =#1
+// CHECK: │ └─template-name~IDENTIFIER
+// CHECK: ├─init-declarator-list~IDENTIFIER
+// CHECK: └─;
+// CHECK-NOT: nested-name-specifier := ::
diff --git a/clang-tools-extra/pseudo/test/fuzzer.cpp b/clang-tools-extra/pseudo/test/fuzzer.cpp
index 6f4d093ed2ad..400746a9d12d 100644
--- a/clang-tools-extra/pseudo/test/fuzzer.cpp
+++ b/clang-tools-extra/pseudo/test/fuzzer.cpp
@@ -1,4 +1,4 @@
// RUN: clang-pseudo-fuzzer -grammar=%cxx-bnf-file -print %s | FileCheck %s
int x;
// CHECK: translation-unit := declaration-seq
-// CHECK: simple-type-specifier := INT
+// CHECK: builtin-type := INT