diff options
author | Anirudh Prasad <anirudh_prasad@hotmail.com> | 2021-04-28 15:42:23 -0400 |
---|---|---|
committer | Anirudh Prasad <anirudh_prasad@hotmail.com> | 2021-04-28 15:43:24 -0400 |
commit | 07b0a72d8e96450336fcd81069f14911d8965db2 (patch) | |
tree | 0ecc34d7b57716f2bd39619b03b307db6aa5cc5a | |
parent | a836de0bdef2ed25e46bd304f3a53a1f08be51c4 (diff) | |
download | llvm-07b0a72d8e96450336fcd81069f14911d8965db2.tar.gz |
[AsmParser][SystemZ][z/OS] Use updated framework in AsmLexer to accept special tokens as Identifiers
- Previously, https://reviews.llvm.org/D99889 changed the framework in the AsmLexer to treat special tokens, if they occur at the start of the string, as Identifiers.
- These are used by the MASM Parser implementation in LLVM, and we can extend some of the changes made in the previous patch to SystemZ.
- In SystemZ, the special "tokens" referred to here are "_", "$", "@", "#". [_|$|@|#] are already supported as "part" of an Identifier.
- The changes in this patch ensure that these special tokens, when they occur at the start of the Identifier, are treated as Identifiers.
Reviewed By: abhina.sreeskantharajan
Differential Revision: https://reviews.llvm.org/D100959
-rw-r--r-- | llvm/include/llvm/MC/MCAsmInfo.h | 28 | ||||
-rw-r--r-- | llvm/lib/MC/MCParser/AsmLexer.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp | 84 |
4 files changed, 112 insertions, 10 deletions
diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index b674af072ed5..f72cc709389d 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -183,25 +183,32 @@ protected: /// This is true if the assembler allows the "?" character at the start of /// of a string to be lexed as an AsmToken::Identifier. - /// If the CommentString is also set to "?", setting this option will have - /// no effect, and the string will be lexed as a comment. - /// Defaults to false. + /// If the AsmLexer determines that the string can be lexed as a possible + /// comment, setting this option will have no effect, and the string will + /// still be lexed as a comment. bool AllowQuestionAtStartOfIdentifier = false; /// This is true if the assembler allows the "$" character at the start of /// of a string to be lexed as an AsmToken::Identifier. - /// If the CommentString is also set to "$", setting this option will have - /// no effect, and the string will be lexed as a comment. - /// Defaults to false. + /// If the AsmLexer determines that the string can be lexed as a possible + /// comment, setting this option will have no effect, and the string will + /// still be lexed as a comment. bool AllowDollarAtStartOfIdentifier = false; /// This is true if the assembler allows the "@" character at the start of /// a string to be lexed as an AsmToken::Identifier. - /// If the CommentString is also set to "@", setting this option will have - /// no effect, and the string will be lexed as a comment. - /// Defaults to false. + /// If the AsmLexer determines that the string can be lexed as a possible + /// comment, setting this option will have no effect, and the string will + /// still be lexed as a comment. bool AllowAtAtStartOfIdentifier = false; + /// This is true if the assembler allows the "#" character at the start of + /// a string to be lexed as an AsmToken::Identifier. + /// If the AsmLexer determines that the string can be lexed as a possible + /// comment, setting this option will have no effect, and the string will + /// still be lexed as a comment. + bool AllowHashAtStartOfIdentifier = false; + /// If this is true, symbol names with invalid characters will be printed in /// quotes. bool SupportsQuotedNames = true; @@ -630,6 +637,9 @@ public: bool doesAllowDollarAtStartOfIdentifier() const { return AllowDollarAtStartOfIdentifier; } + bool doesAllowHashAtStartOfIdentifier() const { + return AllowHashAtStartOfIdentifier; + } bool supportsNameQuoting() const { return SupportsQuotedNames; } bool doesSupportDataRegionDirectives() const { diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index 1c33147aa1e7..28aa5631afe8 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -907,7 +907,11 @@ AsmToken AsmLexer::LexToken() { case '/': IsAtStartOfStatement = OldIsAtStartOfStatement; return LexSlash(); - case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + case '#': { + if (MAI.doesAllowHashAtStartOfIdentifier()) + return LexIdentifier(); + return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + } case '\'': return LexSingleQuote(); case '"': return LexQuote(); case '0': case '1': case '2': case '3': case '4': diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 0c7a1338aaa2..4bb9e15d4c4a 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -24,6 +24,10 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { CommentString = AssemblerDialect == AD_HLASM ? "*" : "#"; RestrictCommentStringToStartOfStatement = (AssemblerDialect == AD_HLASM); AllowAdditionalComments = (AssemblerDialect == AD_ATT); + AllowAtAtStartOfIdentifier = (AssemblerDialect == AD_HLASM); + AllowDollarAtStartOfIdentifier = (AssemblerDialect == AD_HLASM); + AllowHashAtStartOfIdentifier = (AssemblerDialect == AD_HLASM); + ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; UsesELFSectionDirectiveForBSS = true; diff --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp index 12074ddcbd19..81180966708e 100644 --- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp +++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp @@ -44,6 +44,9 @@ public: void setAllowDollarAtStartOfIdentifier(bool Value) { AllowDollarAtStartOfIdentifier = Value; } + void setAllowHashAtStartOfIdentifier(bool Value) { + AllowHashAtStartOfIdentifier = Value; + } }; // Setup a testing class that the GTest framework can call. @@ -571,4 +574,85 @@ TEST_F(SystemZAsmLexerTest, CheckAcceptDollarAtStartOfIdentifier) { {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); lexAndCheckTokens(AsmStr, ExpectedTokens); } + +TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier) { + StringRef AsmStr = "##a#b$c"; + + // Setup. + MUPMAI->setAllowHashAtStartOfIdentifier(true); + MUPMAI->setCommentString("*"); + MUPMAI->setAllowAdditionalComments(false); + setupCallToAsmParser(AsmStr); + Parser->getLexer().setAllowHashInIdentifier(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector<AsmToken::TokenKind> ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier2) { + StringRef AsmStr = "##a#b$c"; + + // Setup. + MUPMAI->setAllowHashAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + Parser->getLexer().setAllowHashInIdentifier(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // By default, the CommentString attribute is set to "#". + // Hence, "##a#b$c" is lexed as a line comment irrespective + // of whether the AllowHashAtStartOfIdentifier attribute is set to true. + SmallVector<AsmToken::TokenKind> ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier3) { + StringRef AsmStr = "##a#b$c"; + + // Setup. + MUPMAI->setAllowHashAtStartOfIdentifier(true); + MUPMAI->setCommentString("*"); + setupCallToAsmParser(AsmStr); + Parser->getLexer().setAllowHashInIdentifier(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // By default, the AsmLexer treats strings that start with "#" + // as a line comment. + // Hence, "##a$b$c" is lexed as a line comment irrespective + // of whether the AllowHashAtStartOfIdentifier attribute is set to true. + SmallVector<AsmToken::TokenKind> ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier4) { + StringRef AsmStr = "##a#b$c"; + + // Setup. + MUPMAI->setAllowHashAtStartOfIdentifier(true); + MUPMAI->setCommentString("*"); + MUPMAI->setAllowAdditionalComments(false); + setupCallToAsmParser(AsmStr); + Parser->getLexer().setAllowHashInIdentifier(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // Since, the AllowAdditionalComments attribute is set to false, + // only strings starting with the CommentString attribute are + // lexed as possible comments. + // Hence, "##a$b$c" is lexed as an Identifier because the + // AllowHashAtStartOfIdentifier attribute is set to true. + SmallVector<AsmToken::TokenKind> ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} } // end anonymous namespace |