summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnirudh Prasad <anirudh_prasad@hotmail.com>2021-04-28 15:42:23 -0400
committerAnirudh Prasad <anirudh_prasad@hotmail.com>2021-04-28 15:43:24 -0400
commit07b0a72d8e96450336fcd81069f14911d8965db2 (patch)
tree0ecc34d7b57716f2bd39619b03b307db6aa5cc5a
parenta836de0bdef2ed25e46bd304f3a53a1f08be51c4 (diff)
downloadllvm-07b0a72d8e96450336fcd81069f14911d8965db2.tar.gz
[AsmParser][SystemZ][z/OS] Use updated framework in AsmLexer to accept special tokens as Identifiers
- Previously, https://reviews.llvm.org/D99889 changed the framework in the AsmLexer to treat special tokens, if they occur at the start of the string, as Identifiers. - These are used by the MASM Parser implementation in LLVM, and we can extend some of the changes made in the previous patch to SystemZ. - In SystemZ, the special "tokens" referred to here are "_", "$", "@", "#". [_|$|@|#] are already supported as "part" of an Identifier. - The changes in this patch ensure that these special tokens, when they occur at the start of the Identifier, are treated as Identifiers. Reviewed By: abhina.sreeskantharajan Differential Revision: https://reviews.llvm.org/D100959
-rw-r--r--llvm/include/llvm/MC/MCAsmInfo.h28
-rw-r--r--llvm/lib/MC/MCParser/AsmLexer.cpp6
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp4
-rw-r--r--llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp84
4 files changed, 112 insertions, 10 deletions
diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index b674af072ed5..f72cc709389d 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -183,25 +183,32 @@ protected:
/// This is true if the assembler allows the "?" character at the start of
/// of a string to be lexed as an AsmToken::Identifier.
- /// If the CommentString is also set to "?", setting this option will have
- /// no effect, and the string will be lexed as a comment.
- /// Defaults to false.
+ /// If the AsmLexer determines that the string can be lexed as a possible
+ /// comment, setting this option will have no effect, and the string will
+ /// still be lexed as a comment.
bool AllowQuestionAtStartOfIdentifier = false;
/// This is true if the assembler allows the "$" character at the start of
/// of a string to be lexed as an AsmToken::Identifier.
- /// If the CommentString is also set to "$", setting this option will have
- /// no effect, and the string will be lexed as a comment.
- /// Defaults to false.
+ /// If the AsmLexer determines that the string can be lexed as a possible
+ /// comment, setting this option will have no effect, and the string will
+ /// still be lexed as a comment.
bool AllowDollarAtStartOfIdentifier = false;
/// This is true if the assembler allows the "@" character at the start of
/// a string to be lexed as an AsmToken::Identifier.
- /// If the CommentString is also set to "@", setting this option will have
- /// no effect, and the string will be lexed as a comment.
- /// Defaults to false.
+ /// If the AsmLexer determines that the string can be lexed as a possible
+ /// comment, setting this option will have no effect, and the string will
+ /// still be lexed as a comment.
bool AllowAtAtStartOfIdentifier = false;
+ /// This is true if the assembler allows the "#" character at the start of
+ /// a string to be lexed as an AsmToken::Identifier.
+ /// If the AsmLexer determines that the string can be lexed as a possible
+ /// comment, setting this option will have no effect, and the string will
+ /// still be lexed as a comment.
+ bool AllowHashAtStartOfIdentifier = false;
+
/// If this is true, symbol names with invalid characters will be printed in
/// quotes.
bool SupportsQuotedNames = true;
@@ -630,6 +637,9 @@ public:
bool doesAllowDollarAtStartOfIdentifier() const {
return AllowDollarAtStartOfIdentifier;
}
+ bool doesAllowHashAtStartOfIdentifier() const {
+ return AllowHashAtStartOfIdentifier;
+ }
bool supportsNameQuoting() const { return SupportsQuotedNames; }
bool doesSupportDataRegionDirectives() const {
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 1c33147aa1e7..28aa5631afe8 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -907,7 +907,11 @@ AsmToken AsmLexer::LexToken() {
case '/':
IsAtStartOfStatement = OldIsAtStartOfStatement;
return LexSlash();
- case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ case '#': {
+ if (MAI.doesAllowHashAtStartOfIdentifier())
+ return LexIdentifier();
+ return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ }
case '\'': return LexSingleQuote();
case '"': return LexQuote();
case '0': case '1': case '2': case '3': case '4':
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 0c7a1338aaa2..4bb9e15d4c4a 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -24,6 +24,10 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
CommentString = AssemblerDialect == AD_HLASM ? "*" : "#";
RestrictCommentStringToStartOfStatement = (AssemblerDialect == AD_HLASM);
AllowAdditionalComments = (AssemblerDialect == AD_ATT);
+ AllowAtAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
+ AllowDollarAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
+ AllowHashAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
+
ZeroDirective = "\t.space\t";
Data64bitsDirective = "\t.quad\t";
UsesELFSectionDirectiveForBSS = true;
diff --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
index 12074ddcbd19..81180966708e 100644
--- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
+++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
@@ -44,6 +44,9 @@ public:
void setAllowDollarAtStartOfIdentifier(bool Value) {
AllowDollarAtStartOfIdentifier = Value;
}
+ void setAllowHashAtStartOfIdentifier(bool Value) {
+ AllowHashAtStartOfIdentifier = Value;
+ }
};
// Setup a testing class that the GTest framework can call.
@@ -571,4 +574,85 @@ TEST_F(SystemZAsmLexerTest, CheckAcceptDollarAtStartOfIdentifier) {
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
+
+TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier) {
+ StringRef AsmStr = "##a#b$c";
+
+ // Setup.
+ MUPMAI->setAllowHashAtStartOfIdentifier(true);
+ MUPMAI->setCommentString("*");
+ MUPMAI->setAllowAdditionalComments(false);
+ setupCallToAsmParser(AsmStr);
+ Parser->getLexer().setAllowHashInIdentifier(true);
+
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
+ SmallVector<AsmToken::TokenKind> ExpectedTokens(
+ {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
+ lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier2) {
+ StringRef AsmStr = "##a#b$c";
+
+ // Setup.
+ MUPMAI->setAllowHashAtStartOfIdentifier(true);
+ setupCallToAsmParser(AsmStr);
+ Parser->getLexer().setAllowHashInIdentifier(true);
+
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
+ // By default, the CommentString attribute is set to "#".
+ // Hence, "##a#b$c" is lexed as a line comment irrespective
+ // of whether the AllowHashAtStartOfIdentifier attribute is set to true.
+ SmallVector<AsmToken::TokenKind> ExpectedTokens(
+ {AsmToken::EndOfStatement, AsmToken::Eof});
+ lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier3) {
+ StringRef AsmStr = "##a#b$c";
+
+ // Setup.
+ MUPMAI->setAllowHashAtStartOfIdentifier(true);
+ MUPMAI->setCommentString("*");
+ setupCallToAsmParser(AsmStr);
+ Parser->getLexer().setAllowHashInIdentifier(true);
+
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
+ // By default, the AsmLexer treats strings that start with "#"
+ // as a line comment.
+ // Hence, "##a$b$c" is lexed as a line comment irrespective
+ // of whether the AllowHashAtStartOfIdentifier attribute is set to true.
+ SmallVector<AsmToken::TokenKind> ExpectedTokens(
+ {AsmToken::EndOfStatement, AsmToken::Eof});
+ lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier4) {
+ StringRef AsmStr = "##a#b$c";
+
+ // Setup.
+ MUPMAI->setAllowHashAtStartOfIdentifier(true);
+ MUPMAI->setCommentString("*");
+ MUPMAI->setAllowAdditionalComments(false);
+ setupCallToAsmParser(AsmStr);
+ Parser->getLexer().setAllowHashInIdentifier(true);
+
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
+ // Since, the AllowAdditionalComments attribute is set to false,
+ // only strings starting with the CommentString attribute are
+ // lexed as possible comments.
+ // Hence, "##a$b$c" is lexed as an Identifier because the
+ // AllowHashAtStartOfIdentifier attribute is set to true.
+ SmallVector<AsmToken::TokenKind> ExpectedTokens(
+ {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
+ lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
} // end anonymous namespace