diff options
author | Krasimir Georgiev <krasimir@google.com> | 2017-10-30 14:01:50 +0000 |
---|---|---|
committer | Krasimir Georgiev <krasimir@google.com> | 2017-10-30 14:01:50 +0000 |
commit | bd3b05d2cc22423f7eb44ff6bf948b44cc605309 (patch) | |
tree | 9ad996068e7388a2cdb3db0bb9a17beb559813ef /lib | |
parent | 1157fbd7d668f9579b5a633dd245657bb940076b (diff) | |
download | clang-bd3b05d2cc22423f7eb44ff6bf948b44cc605309.tar.gz |
[clang-format] Format raw string literals
Summary:
This patch adds raw string literal formatting.
Reviewers: djasper, klimek
Reviewed By: klimek
Subscribers: klimek, mgorny
Differential Revision: https://reviews.llvm.org/D35943
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@316903 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 159 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.h | 37 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 96 | ||||
-rw-r--r-- | lib/Format/FormatInternal.h | 79 | ||||
-rw-r--r-- | lib/Format/FormatTokenLexer.cpp | 4 | ||||
-rw-r--r-- | lib/Format/FormatTokenLexer.h | 2 | ||||
-rw-r--r-- | lib/Format/NamespaceEndCommentsFixer.cpp | 4 | ||||
-rw-r--r-- | lib/Format/NamespaceEndCommentsFixer.h | 2 | ||||
-rw-r--r-- | lib/Format/SortJavaScriptImports.cpp | 10 | ||||
-rw-r--r-- | lib/Format/TokenAnalyzer.cpp | 35 | ||||
-rw-r--r-- | lib/Format/TokenAnalyzer.h | 47 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 3 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 5 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 69 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.h | 8 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 9 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 7 | ||||
-rw-r--r-- | lib/Format/UsingDeclarationsSorter.cpp | 4 | ||||
-rw-r--r-- | lib/Format/UsingDeclarationsSorter.h | 2 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 5 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 2 |
21 files changed, 482 insertions, 107 deletions
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index b57b8de2e7..4f624dd2fb 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -14,6 +14,7 @@ #include "ContinuationIndenter.h" #include "BreakableToken.h" +#include "FormatInternal.h" #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" @@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok, (LessTok.Previous && LessTok.Previous->is(tok::equal)))); } +// Returns the delimiter of a raw string literal, or None if TokenText is not +// the text of a raw string literal. The delimiter could be the empty string. +// For example, the delimiter of R"deli(cont)deli" is deli. +static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) { + if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. + || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) + return None; + + // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has + // size at most 16 by the standard, so the first '(' must be among the first + // 19 bytes. + size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); + if (LParenPos == StringRef::npos) + return None; + StringRef Delimiter = TokenText.substr(2, LParenPos - 2); + + // Check that the string ends in ')Delimiter"'. + size_t RParenPos = TokenText.size() - Delimiter.size() - 2; + if (TokenText[RParenPos] != ')') + return None; + if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) + return None; + return Delimiter; +} + +RawStringFormatStyleManager::RawStringFormatStyleManager( + const FormatStyle &CodeStyle) { + for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { + FormatStyle Style; + if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, + RawStringFormat.Language, &Style)) { + Style = getLLVMStyle(); + Style.Language = RawStringFormat.Language; + } + Style.ColumnLimit = CodeStyle.ColumnLimit; + DelimiterStyle.insert({RawStringFormat.Delimiter, Style}); + } +} + +llvm::Optional<FormatStyle> +RawStringFormatStyleManager::get(StringRef Delimiter) const { + auto It = DelimiterStyle.find(Delimiter); + if (It == DelimiterStyle.end()) + return None; + return It->second; +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -85,14 +133,18 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), - CommentPragmasRegex(Style.CommentPragmas) {} + CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {} LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, + unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun) { LineState State; State.FirstIndent = FirstIndent; - State.Column = FirstIndent; + if (FirstStartColumn && Line->First->NewlinesBefore == 0) + State.Column = FirstStartColumn; + else + State.Column = FirstIndent; // With preprocessor directive indentation, the line starts on column 0 // since it's indented after the hash, but FirstIndent is set to the // preprocessor indent. @@ -1216,6 +1268,89 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { State.Stack.back().BreakBeforeParameter = true; } +static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, + unsigned TabWidth, + encoding::Encoding Encoding) { + size_t LastNewlinePos = Text.find_last_of("\n"); + if (LastNewlinePos == StringRef::npos) { + return StartColumn + + encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding); + } else { + return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos), + /*StartColumn=*/0, TabWidth, Encoding); + } +} + +unsigned ContinuationIndenter::reformatRawStringLiteral( + const FormatToken &Current, unsigned StartColumn, LineState &State, + StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) { + // The text of a raw string is between the leading 'R"delimiter(' and the + // trailing 'delimiter)"'. + unsigned PrefixSize = 3 + Delimiter.size(); + unsigned SuffixSize = 2 + Delimiter.size(); + + // The first start column is the column the raw text starts. + unsigned FirstStartColumn = StartColumn + PrefixSize; + + // The next start column is the intended indentation a line break inside + // the raw string at level 0. It is determined by the following rules: + // - if the content starts on newline, it is one level more than the current + // indent, and + // - if the content does not start on a newline, it is the first start + // column. + // These rules have the advantage that the formatted content both does not + // violate the rectangle rule and visually flows within the surrounding + // source. + bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n'; + unsigned NextStartColumn = ContentStartsOnNewline + ? State.Stack.back().Indent + Style.IndentWidth + : FirstStartColumn; + + // The last start column is the column the raw string suffix starts if it is + // put on a newline. + // The last start column is the intended indentation of the raw string postfix + // if it is put on a newline. It is determined by the following rules: + // - if the raw string prefix starts on a newline, it is the column where + // that raw string prefix starts, and + // - if the raw string prefix does not start on a newline, it is the current + // indent. + unsigned LastStartColumn = Current.NewlinesBefore + ? FirstStartColumn - PrefixSize + : State.Stack.back().Indent; + + std::string RawText = + Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); + + std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat( + RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, + FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>", + /*FormattingAttemptStatus=*/nullptr); + + auto NewCode = applyAllReplacements(RawText, Fixes.first); + tooling::Replacements NoFixes; + if (!NewCode) { + State.Column += Current.ColumnWidth; + return 0; + } + if (!DryRun) { + SourceLocation OriginLoc = + Current.Tok.getLocation().getLocWithOffset(PrefixSize); + for (const tooling::Replacement &Fix : Fixes.first) { + auto Err = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), + Fix.getLength(), Fix.getReplacementText())); + if (Err) { + llvm::errs() << "Failed to reformat raw string: " + << llvm::toString(std::move(Err)) << "\n"; + } + } + } + unsigned RawLastLineEndColumn = getLastLineEndColumn( + *NewCode, FirstStartColumn, Style.TabWidth, Encoding); + State.Column = RawLastLineEndColumn + SuffixSize; + return Fixes.second; +} + unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, LineState &State) { if (!Current.IsMultiline) @@ -1238,9 +1373,18 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { - // Don't break multi-line tokens other than block comments. Instead, just - // update the state. - if (Current.isNot(TT_BlockComment) && Current.IsMultiline) + // Compute the raw string style to use in case this is a raw string literal + // that can be reformatted. + llvm::Optional<StringRef> Delimiter = None; + llvm::Optional<FormatStyle> RawStringStyle = None; + if (Current.isStringLiteral()) + Delimiter = getRawStringDelimiter(Current.TokenText); + if (Delimiter) + RawStringStyle = RawStringFormats.get(*Delimiter); + + // Don't break multi-line tokens other than block comments and raw string + // literals. Instead, just update the state. + if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline) return addMultilineToken(Current, State); // Don't break implicit string literals or import statements. @@ -1275,6 +1419,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, if (Current.IsUnterminatedLiteral) return 0; + if (RawStringStyle) { + RawStringStyle->ColumnLimit = ColumnLimit; + return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter, + *RawStringStyle, DryRun); + } StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index 631129c1ea..3bc9929b3d 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -20,6 +20,8 @@ #include "FormatToken.h" #include "clang/Format/Format.h" #include "llvm/Support/Regex.h" +#include <map> +#include <tuple> namespace clang { class SourceManager; @@ -30,8 +32,17 @@ class AnnotatedLine; struct FormatToken; struct LineState; struct ParenState; +struct RawStringFormatStyleManager; class WhitespaceManager; +struct RawStringFormatStyleManager { + llvm::StringMap<FormatStyle> DelimiterStyle; + + RawStringFormatStyleManager(const FormatStyle &CodeStyle); + + llvm::Optional<FormatStyle> get(StringRef Delimiter) const; +}; + class ContinuationIndenter { public: /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in @@ -44,9 +55,11 @@ public: bool BinPackInconclusiveFunctions); /// \brief Get the initial state, i.e. the state after placing \p Line's - /// first token at \p FirstIndent. - LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, - bool DryRun); + /// first token at \p FirstIndent. When reformatting a fragment of code, as in + /// the case of formatting inside raw string literals, \p FirstStartColumn is + /// the column at which the state of the parent formatter is. + LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, + const AnnotatedLine *Line, bool DryRun); // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a // better home. @@ -88,15 +101,24 @@ private: /// \brief Update 'State' with the next token opening a nested block. void moveStateToNewBlock(LineState &State); + /// \brief Reformats a raw string literal. + /// + /// \returns An extra penalty induced by reformatting the token. + unsigned reformatRawStringLiteral(const FormatToken &Current, + unsigned StartColumn, LineState &State, + StringRef Delimiter, + const FormatStyle &RawStringStyle, + bool DryRun); + /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// /// \returns An extra penalty if a token was broken, otherwise 0. /// - /// The returned penalty will cover the cost of the additional line breaks and - /// column limit violation in all lines except for the last one. The penalty - /// for the column limit violation in the last line (and in single line - /// tokens) is handled in \c addNextStateToQueue. + /// The returned penalty will cover the cost of the additional line breaks + /// and column limit violation in all lines except for the last one. The + /// penalty for the column limit violation in the last line (and in single + /// line tokens) is handled in \c addNextStateToQueue. unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun); @@ -143,6 +165,7 @@ private: encoding::Encoding Encoding; bool BinPackInconclusiveFunctions; llvm::Regex CommentPragmasRegex; + const RawStringFormatStyleManager RawStringFormats; }; struct ParenState { diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index cb0a010512..ecc2bb7b84 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -16,6 +16,7 @@ #include "clang/Format/Format.h" #include "AffectedRangeManager.h" #include "ContinuationIndenter.h" +#include "FormatInternal.h" #include "FormatTokenLexer.h" #include "NamespaceEndCommentsFixer.h" #include "SortJavaScriptImports.h" @@ -44,7 +45,8 @@ using clang::format::FormatStyle; -LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory); +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat); namespace llvm { namespace yaml { @@ -389,6 +391,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", Style.PenaltyReturnTypeOnItsOwnLine); IO.mapOptional("PointerAlignment", Style.PointerAlignment); + IO.mapOptional("RawStringFormats", Style.RawStringFormats); IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("SortIncludes", Style.SortIncludes); IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations); @@ -441,6 +444,14 @@ template <> struct MappingTraits<FormatStyle::IncludeCategory> { } }; +template <> struct MappingTraits<FormatStyle::RawStringFormat> { + static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) { + IO.mapOptional("Delimiter", Format.Delimiter); + IO.mapOptional("Language", Format.Language); + IO.mapOptional("BasedOnStyle", Format.BasedOnStyle); + } +}; + // Allows to read vector<FormatStyle> while keeping default values. // IO.getContext() should contain a pointer to the FormatStyle structure, that // will be used to get default values for missing keys. @@ -620,6 +631,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; + LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto, "google"}}; LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; @@ -895,7 +907,7 @@ public: JavaScriptRequoter(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -903,7 +915,7 @@ public: AnnotatedLines.end()); tooling::Replacements Result; requoteJSStringLiteral(AnnotatedLines, Result); - return Result; + return {Result, 0}; } private: @@ -984,7 +996,7 @@ public: FormattingAttemptStatus *Status) : TokenAnalyzer(Env, Style), Status(Status) {} - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -1003,13 +1015,20 @@ public: ContinuationIndenter Indenter(Style, Tokens.getKeywords(), Env.getSourceManager(), Whitespaces, Encoding, BinPackInconclusiveFunctions); - UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), - Env.getSourceManager(), Status) - .format(AnnotatedLines); + unsigned Penalty = + UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, + Tokens.getKeywords(), Env.getSourceManager(), + Status) + .format(AnnotatedLines, /*DryRun=*/false, + /*AdditionalIndent=*/0, + /*FixBadIndentation=*/false, + /*FirstStartColumn=*/Env.getFirstStartColumn(), + /*NextStartColumn=*/Env.getNextStartColumn(), + /*LastStartColumn=*/Env.getLastStartColumn()); for (const auto &R : Whitespaces.generateReplacements()) if (Result.add(R)) - return Result; - return Result; + return {Result, 0}; + return {Result, Penalty}; } private: @@ -1097,7 +1116,7 @@ public: DeletedTokens(FormatTokenLess(Env.getSourceManager())) {} // FIXME: eliminate unused parameters. - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -1125,7 +1144,7 @@ public: } } - return generateFixes(); + return {generateFixes(), 0}; } private: @@ -1906,19 +1925,22 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, return processReplacements(Cleanup, Code, NewReplaces, Style); } -tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName, - FormattingAttemptStatus *Status) { +namespace internal { +std::pair<tooling::Replacements, unsigned> +reformat(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, + FormattingAttemptStatus *Status) { FormatStyle Expanded = expandPresets(Style); if (Expanded.DisableFormat) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; if (isLikelyXml(Code)) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code)) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; - typedef std::function<tooling::Replacements(const Environment &)> + typedef std::function<std::pair<tooling::Replacements, unsigned>( + const Environment &)> AnalyzerPass; SmallVector<AnalyzerPass, 4> Passes; @@ -1944,26 +1966,42 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, return Formatter(Env, Expanded, Status).process(); }); - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment( + Code, FileName, Ranges, FirstStartColumn, NextStartColumn, + LastStartColumn); llvm::Optional<std::string> CurrentCode = None; tooling::Replacements Fixes; + unsigned Penalty = 0; for (size_t I = 0, E = Passes.size(); I < E; ++I) { - tooling::Replacements PassFixes = Passes[I](*Env); + std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env); auto NewCode = applyAllReplacements( - CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes); + CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first); if (NewCode) { - Fixes = Fixes.merge(PassFixes); + Fixes = Fixes.merge(PassFixes.first); + Penalty += PassFixes.second; if (I + 1 < E) { CurrentCode = std::move(*NewCode); Env = Environment::CreateVirtualEnvironment( *CurrentCode, FileName, - tooling::calculateRangesAfterReplacements(Fixes, Ranges)); + tooling::calculateRangesAfterReplacements(Fixes, Ranges), + FirstStartColumn, NextStartColumn, LastStartColumn); } } } - return Fixes; + return {Fixes, Penalty}; +} +} // namespace internal + +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, + StringRef FileName, + FormattingAttemptStatus *Status) { + return internal::reformat(Style, Code, Ranges, + /*FirstStartColumn=*/0, + /*NextStartColumn=*/0, + /*LastStartColumn=*/0, FileName, Status) + .first; } tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, @@ -1975,7 +2013,7 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); Cleaner Clean(*Env, Style); - return Clean.process(); + return Clean.process().first; } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, @@ -1995,7 +2033,7 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); NamespaceEndCommentsFixer Fix(*Env, Style); - return Fix.process(); + return Fix.process().first; } tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, @@ -2005,7 +2043,7 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); UsingDeclarationsSorter Sorter(*Env, Style); - return Sorter.process(); + return Sorter.process().first; } LangOptions getFormattingLangOpts(const FormatStyle &Style) { diff --git a/lib/Format/FormatInternal.h b/lib/Format/FormatInternal.h new file mode 100644 index 0000000000..1373e9d2ee --- /dev/null +++ b/lib/Format/FormatInternal.h @@ -0,0 +1,79 @@ +//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares Format APIs to be used internally by the +/// formatting library implementation. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H +#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H + +namespace clang { +namespace format { +namespace internal { + +/// \brief Reformats the given \p Ranges in the code fragment \p Code. +/// +/// A fragment of code could conceptually be surrounded by other code that might +/// constrain how that fragment is laid out. +/// For example, consider the fragment of code between 'R"(' and ')"', +/// exclusive, in the following code: +/// +/// void outer(int x) { +/// string inner = R"(name: data +/// ^ FirstStartColumn +/// value: { +/// x: 1 +/// ^ NextStartColumn +/// } +/// )"; +/// ^ LastStartColumn +/// } +/// +/// The outer code can influence the inner fragment as follows: +/// * \p FirstStartColumn specifies the column at which \p Code starts. +/// * \p NextStartColumn specifies the additional indent dictated by the +/// surrounding code. It is applied to the rest of the lines of \p Code. +/// * \p LastStartColumn specifies the column at which the last line of +/// \p Code should end, in case the last line is an empty line. +/// +/// In the case where the last line of the fragment contains content, +/// the fragment ends at the end of that content and \p LastStartColumn is +/// not taken into account, for example in: +/// +/// void block() { +/// string inner = R"(name: value)"; +/// } +/// +/// Each range is extended on either end to its next bigger logic unit, i.e. +/// everything that might influence its formatting or might be influenced by its +/// formatting. +/// +/// Returns a pair P, where: +/// * P.first are the ``Replacements`` necessary to make all \p Ranges comply +/// with \p Style. +/// * P.second is the penalty induced by formatting the fragment \p Code. +/// If the formatting of the fragment doesn't have a notion of penalty, +/// returns 0. +/// +/// If ``Status`` is non-null, its value will be populated with the status of +/// this formatting attempt. See \c FormattingAttemptStatus. +std::pair<tooling::Replacements, unsigned> +reformat(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, + FormattingAttemptStatus *Status); + +} // namespace internal +} // namespace format +} // namespace clang + +#endif diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 8fb3d84ea5..d37fcec6c5 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -24,10 +24,10 @@ namespace clang { namespace format { FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, - const FormatStyle &Style, + unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h index bf10f09cd1..6605a1c35a 100644 --- a/lib/Format/FormatTokenLexer.h +++ b/lib/Format/FormatTokenLexer.h @@ -36,7 +36,7 @@ enum LexerState { class FormatTokenLexer { public: - FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, + FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding); ArrayRef<FormatToken *> lex(); diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp index c660843dca..df99bb2e13 100644 --- a/lib/Format/NamespaceEndCommentsFixer.cpp +++ b/lib/Format/NamespaceEndCommentsFixer.cpp @@ -137,7 +137,7 @@ NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} -tooling::Replacements NamespaceEndCommentsFixer::analyze( +std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); @@ -206,7 +206,7 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze( } StartLineIndex = SIZE_MAX; } - return Fixes; + return {Fixes, 0}; } } // namespace format diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h index 7790668a2e..4779f0d27c 100644 --- a/lib/Format/NamespaceEndCommentsFixer.h +++ b/lib/Format/NamespaceEndCommentsFixer.h @@ -25,7 +25,7 @@ class NamespaceEndCommentsFixer : public TokenAnalyzer { public: NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style); - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override; diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp index c4db9a6c2f..d0b979e100 100644 --- a/lib/Format/SortJavaScriptImports.cpp +++ b/lib/Format/SortJavaScriptImports.cpp @@ -123,7 +123,7 @@ public: : TokenAnalyzer(Env, Style), FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -138,7 +138,7 @@ public: parseModuleReferences(Keywords, AnnotatedLines); if (References.empty()) - return Result; + return {Result, 0}; SmallVector<unsigned, 16> Indices; for (unsigned i = 0, e = References.size(); i != e; ++i) @@ -168,7 +168,7 @@ public: } if (ReferencesInOrder && SymbolsInOrder) - return Result; + return {Result, 0}; SourceRange InsertionPoint = References[0].Range; InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); @@ -202,7 +202,7 @@ public: assert(false); } - return Result; + return {Result, 0}; } private: @@ -449,7 +449,7 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); JavaScriptImportSorter Sorter(*Env, Style); - return Sorter.process(); + return Sorter.process().first; } } // end namespace format diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp index 7a8b70c4ba..d1dfb1fea3 100644 --- a/lib/Format/TokenAnalyzer.cpp +++ b/lib/Format/TokenAnalyzer.cpp @@ -38,7 +38,10 @@ namespace format { // Code. std::unique_ptr<Environment> Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges) { + ArrayRef<tooling::Range> Ranges, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) { // This is referenced by `FileMgr` and will be released by `FileMgr` when it // is deleted. IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem( @@ -70,9 +73,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return llvm::make_unique<Environment>(ID, std::move(FileMgr), - std::move(VirtualSM), - std::move(Diagnostics), CharRanges); + return llvm::make_unique<Environment>( + ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics), + CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn); } TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) @@ -89,14 +92,16 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) << "\n"); } -tooling::Replacements TokenAnalyzer::process() { +std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { tooling::Replacements Result; - FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style, - Encoding); + FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), + Env.getFirstStartColumn(), Style, Encoding); - UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this); + UnwrappedLineParser Parser(Style, Tokens.getKeywords(), + Env.getFirstStartColumn(), Tokens.lex(), *this); Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); + unsigned Penalty = 0; for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); SmallVector<AnnotatedLine *, 16> AnnotatedLines; @@ -107,13 +112,13 @@ tooling::Replacements TokenAnalyzer::process() { Annotator.annotate(*AnnotatedLines.back()); } - tooling::Replacements RunResult = + std::pair<tooling::Replacements, unsigned> RunResult = analyze(Annotator, AnnotatedLines, Tokens); DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; - for (tooling::Replacements::const_iterator I = RunResult.begin(), - E = RunResult.end(); + for (tooling::Replacements::const_iterator I = RunResult.first.begin(), + E = RunResult.first.end(); I != E; ++I) { llvm::dbgs() << I->toString() << "\n"; } @@ -121,17 +126,19 @@ tooling::Replacements TokenAnalyzer::process() { for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { delete AnnotatedLines[i]; } - for (const auto &R : RunResult) { + + Penalty += RunResult.second; + for (const auto &R : RunResult.first) { auto Err = Result.add(R); // FIXME: better error handling here. For now, simply return an empty // Replacements to indicate failure. if (Err) { llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - return tooling::Replacements(); + return {tooling::Replacements(), 0}; } } } - return Result; + return {Result, Penalty}; } void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) { diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h index 78a3d1bc8d..96ea00b25b 100644 --- a/lib/Format/TokenAnalyzer.h +++ b/lib/Format/TokenAnalyzer.h @@ -37,21 +37,37 @@ namespace format { class Environment { public: Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges) - : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {} + : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM), + FirstStartColumn(0), + NextStartColumn(0), + LastStartColumn(0) {} Environment(FileID ID, std::unique_ptr<FileManager> FileMgr, std::unique_ptr<SourceManager> VirtualSM, std::unique_ptr<DiagnosticsEngine> Diagnostics, - const std::vector<CharSourceRange> &CharRanges) + const std::vector<CharSourceRange> &CharRanges, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()), - SM(*VirtualSM), FileMgr(std::move(FileMgr)), + SM(*VirtualSM), + FirstStartColumn(FirstStartColumn), + NextStartColumn(NextStartColumn), + LastStartColumn(LastStartColumn), + FileMgr(std::move(FileMgr)), VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {} - // This sets up an virtual file system with file \p FileName containing \p - // Code. + // This sets up an virtual file system with file \p FileName containing the + // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, + // that the next lines of \p Code should start at \p NextStartColumn, and + // that \p Code should end at \p LastStartColumn if it ends in newline. + // See also the documentation of clang::format::internal::reformat. static std::unique_ptr<Environment> CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges); + ArrayRef<tooling::Range> Ranges, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); FileID getFileID() const { return ID; } @@ -59,10 +75,25 @@ public: const SourceManager &getSourceManager() const { return SM; } + // Returns the column at which the fragment of code managed by this + // environment starts. + unsigned getFirstStartColumn() const { return FirstStartColumn; } + + // Returns the column at which subsequent lines of the fragment of code + // managed by this environment should start. + unsigned getNextStartColumn() const { return NextStartColumn; } + + // Returns the column at which the fragment of code managed by this + // environment should end if it ends in a newline. + unsigned getLastStartColumn() const { return LastStartColumn; } + private: FileID ID; SmallVector<CharSourceRange, 8> CharRanges; SourceManager &SM; + unsigned FirstStartColumn; + unsigned NextStartColumn; + unsigned LastStartColumn; // The order of these fields are important - they should be in the same order // as they are created in `CreateVirtualEnvironment` so that they can be @@ -76,10 +107,10 @@ class TokenAnalyzer : public UnwrappedLineConsumer { public: TokenAnalyzer(const Environment &Env, const FormatStyle &Style); - tooling::Replacements process(); + std::pair<tooling::Replacements, unsigned> process(); protected: - virtual tooling::Replacements + virtual std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) = 0; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index a49cbaab5f..1b22e26600 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -1891,7 +1891,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { } Line.First->TotalLength = - Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; + Line.First->IsMultiline ? Style.ColumnLimit + : Line.FirstStartColumn + Line.First->ColumnWidth; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current) { diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index 805509533b..04a18d45b8 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -43,7 +43,8 @@ public: InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), - LeadingEmptyLinesAffected(false), ChildrenAffected(false) { + LeadingEmptyLinesAffected(false), ChildrenAffected(false), + FirstStartColumn(Line.FirstStartColumn) { assert(!Line.Tokens.empty()); // Calculate Next and Previous for all tokens. Note that we must overwrite @@ -127,6 +128,8 @@ public: /// \c True if one of this line's children intersects with an input range. bool ChildrenAffected; + unsigned FirstStartColumn; + private: // Disallow copying. AnnotatedLine(const AnnotatedLine &) = delete; diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index d25f0a1c29..a82cd5abe2 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -659,7 +659,9 @@ public: /// \brief Formats an \c AnnotatedLine and returns the penalty. /// /// If \p DryRun is \c false, directly applies the changes. - virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + virtual unsigned formatLine(const AnnotatedLine &Line, + unsigned FirstIndent, + unsigned FirstStartColumn, bool DryRun) = 0; protected: @@ -730,7 +732,8 @@ protected: *Child->First, /*Newlines=*/0, /*Spaces=*/1, /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); } - Penalty += formatLine(*Child, State.Column + 1, DryRun); + Penalty += + formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun); State.Column += 1 + Child->Last->TotalLength; return true; @@ -756,10 +759,10 @@ public: /// \brief Formats the line, simply keeping all of the input's line breaking /// decisions. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { assert(!DryRun); - LineState State = - Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false); + LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn, + &Line, /*DryRun=*/false); while (State.NextToken) { bool Newline = Indenter->mustBreak(State) || @@ -782,9 +785,10 @@ public: /// \brief Puts all tokens into a single line. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { unsigned Penalty = 0; - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); while (State.NextToken) { formatChildren(State, /*Newline=*/false, DryRun, Penalty); Indenter->addTokenToState( @@ -806,8 +810,9 @@ public: /// \brief Formats the line by finding the best line breaks with line lengths /// below the column limit. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + unsigned FirstStartColumn, bool DryRun) override { + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); // If the ObjC method declaration does not fit on a line, we should format // it with one arg per line. @@ -974,7 +979,10 @@ private: unsigned UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, int AdditionalIndent, - bool FixBadIndentation) { + bool FixBadIndentation, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) { LineJoiner Joiner(Style, Keywords, Lines); // Try to look up already computed penalty in DryRun-mode. @@ -994,9 +1002,10 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, // The minimum level of consecutive lines that have been formatted. unsigned RangeMinLevel = UINT_MAX; + bool FirstLine = true; for (const AnnotatedLine *Line = Joiner.getNextMergedLine(DryRun, IndentTracker); - Line; Line = NextLine) { + Line; Line = NextLine, FirstLine = false) { const AnnotatedLine &TheLine = *Line; unsigned Indent = IndentTracker.getIndent(); @@ -1020,8 +1029,12 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, } if (ShouldFormat && TheLine.Type != LT_Invalid) { - if (!DryRun) - formatFirstToken(TheLine, PreviousLine, Indent); + if (!DryRun) { + bool LastLine = Line->First->is(tok::eof); + formatFirstToken(TheLine, PreviousLine, + Indent, + LastLine ? LastStartColumn : NextStartColumn + Indent); + } NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); @@ -1030,16 +1043,18 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, (TheLine.Type == LT_ImportStatement && (Style.Language != FormatStyle::LK_JavaScript || !Style.JavaScriptWrapImports)); - if (Style.ColumnLimit == 0) NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else if (FitsIntoOneLine) Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); } else { // If no token in the current line is affected, we still need to format @@ -1062,6 +1077,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, // Format the first token. if (ReformatLeadingWhitespace) formatFirstToken(TheLine, PreviousLine, + TheLine.First->OriginalColumn, TheLine.First->OriginalColumn); else Whitespaces->addUntouchableToken(*TheLine.First, @@ -1084,12 +1100,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, - unsigned Indent) { + unsigned Indent, + unsigned NewlineIndent) { FormatToken &RootToken = *Line.First; if (RootToken.is(tok::eof)) { unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0, - /*StartOfTokenColumn=*/0); + unsigned TokenIndent = Newlines ? NewlineIndent : 0; + Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent, + TokenIndent); return; } unsigned Newlines = @@ -1104,10 +1122,6 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) Newlines = 0; - // Preprocessor directives get indented after the hash, if indented. - if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement) - Indent = 0; - // Remove empty lines after "{". if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && PreviousLine->Last->is(tok::l_brace) && @@ -1125,6 +1139,13 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) Newlines = std::min(1u, Newlines); + if (Newlines) + Indent = NewlineIndent; + + // Preprocessor directives get indented after the hash, if indented. + if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement) + Indent = 0; + Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, Line.InPPDirective && !RootToken.HasUnescapedNewline); diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h index 9d7a910b98..6432ca83a4 100644 --- a/lib/Format/UnwrappedLineFormatter.h +++ b/lib/Format/UnwrappedLineFormatter.h @@ -40,13 +40,17 @@ public: /// \brief Format the current block and return the penalty. unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun = false, int AdditionalIndent = 0, - bool FixBadIndentation = false); + bool FixBadIndentation = false, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); private: /// \brief Add a new line and the required indent before the first Token /// of the \c UnwrappedLine if there was no structural parsing error. void formatFirstToken(const AnnotatedLine &Line, - const AnnotatedLine *PreviousLine, unsigned Indent); + const AnnotatedLine *PreviousLine, unsigned Indent, + unsigned NewlineIndent); /// \brief Returns the column limit for a line, taking into account whether we /// need an escaped newline due to a continued preprocessor directive. diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index e210beb20e..9243cd99cb 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -225,6 +225,7 @@ private: UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), @@ -232,7 +233,7 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), IfNdefCondition(nullptr), FoundIncludeGuardStart(false), - IncludeGuardRejected(false) {} + IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -247,10 +248,12 @@ void UnwrappedLineParser::reset() { CurrentLines = &Lines; DeclarationScopeStack.clear(); PPStack.clear(); + Line->FirstStartColumn = FirstStartColumn; } void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); + Line->FirstStartColumn = FirstStartColumn; do { DEBUG(llvm::dbgs() << "----\n"); reset(); @@ -2193,7 +2196,8 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix = "") { - llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" + llvm::dbgs() << Prefix << "Line(" << Line.Level + << ", FSC=" << Line.FirstStartColumn << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2226,6 +2230,7 @@ void UnwrappedLineParser::addUnwrappedLine() { CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; + Line->FirstStartColumn = 0; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 4437ccf28d..1d8ccabbd0 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -56,6 +56,8 @@ struct UnwrappedLine { size_t MatchingOpeningBlockLineIndex; static const size_t kInvalidIndex = -1; + + unsigned FirstStartColumn = 0; }; class UnwrappedLineConsumer { @@ -71,6 +73,7 @@ class UnwrappedLineParser { public: UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback); @@ -249,6 +252,10 @@ private: FormatToken *IfNdefCondition; bool FoundIncludeGuardStart; bool IncludeGuardRejected; + // Contains the first start column where the source begins. This is zero for + // normal source code and may be nonzero when formatting a code fragment that + // does not start at the beginning of the file. + unsigned FirstStartColumn; friend class ScopedLineState; friend class CompoundStatementIndenter; diff --git a/lib/Format/UsingDeclarationsSorter.cpp b/lib/Format/UsingDeclarationsSorter.cpp index 4d60d8fd9a..d6753b545e 100644 --- a/lib/Format/UsingDeclarationsSorter.cpp +++ b/lib/Format/UsingDeclarationsSorter.cpp @@ -124,7 +124,7 @@ UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} -tooling::Replacements UsingDeclarationsSorter::analyze( +std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze( TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); @@ -149,7 +149,7 @@ tooling::Replacements UsingDeclarationsSorter::analyze( UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label)); } endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - return Fixes; + return {Fixes, 0}; } } // namespace format diff --git a/lib/Format/UsingDeclarationsSorter.h b/lib/Format/UsingDeclarationsSorter.h index f7d5f97e3a..6f137712d8 100644 --- a/lib/Format/UsingDeclarationsSorter.h +++ b/lib/Format/UsingDeclarationsSorter.h @@ -25,7 +25,7 @@ class UsingDeclarationsSorter : public TokenAnalyzer { public: UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style); - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 0f2732c9fc..a5477a9963 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -67,6 +67,11 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, /*IsInsideToken=*/false)); } +llvm::Error +WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) { + return Replaces.add(Replacement); +} + void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 98a2071cf4..af20dc5616 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -57,6 +57,8 @@ public: /// was not called. void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); + llvm::Error addReplacement(const tooling::Replacement &Replacement); + /// \brief Inserts or replaces whitespace in the middle of a token. /// /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix |