summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam McCall <sam.mccall@gmail.com>2021-01-12 17:18:35 +0100
committerSam McCall <sam.mccall@gmail.com>2021-01-13 17:29:30 +0100
commit66d5994bd38a9be4a0c05de2b69f88b64e6845ce (patch)
tree1d87dc6161f95ff483ec879af4bab0b0aad72bb2
parent4fe17ada55ade9b77e18521dae0985cb4a88f6c4 (diff)
downloadllvm-66d5994bd38a9be4a0c05de2b69f88b64e6845ce.tar.gz
[clangd] Explicitly avoid background-indexing the same file twice.
This used to implicitly never happen due to only discovering each CDB once. We may want to carefully support reindexing one day, but we need to do it carefully (tricky tradeoffs) and it would need further support in background indexer. Making this explicit here rather than just turning off rebroadcast in background index for a few reasons: - allows *new* files in the same CDB to be indexed - relying on bugs-at-a-distance cancelling each other out is bound to bite us - gets us closer to actually supporting reindexing, which requires similar tracking Differential Revision: https://reviews.llvm.org/D94503
-rw-r--r--clang-tools-extra/clangd/index/Background.cpp7
-rw-r--r--clang-tools-extra/clangd/index/Background.h4
-rw-r--r--clang-tools-extra/clangd/index/BackgroundQueue.cpp26
-rw-r--r--clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp76
4 files changed, 91 insertions, 22 deletions
diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index 1649bffea2ed..e4ce1f57ff2f 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -43,6 +43,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <atomic>
@@ -139,8 +140,8 @@ BackgroundQueue::Task BackgroundIndex::changedFilesTask(
std::mt19937(std::random_device{}()));
std::vector<BackgroundQueue::Task> Tasks;
Tasks.reserve(NeedsReIndexing.size());
- for (auto &Cmd : NeedsReIndexing)
- Tasks.push_back(indexFileTask(std::move(Cmd)));
+ for (const auto &File : NeedsReIndexing)
+ Tasks.push_back(indexFileTask(std::move(File)));
Queue.append(std::move(Tasks));
});
@@ -156,6 +157,7 @@ static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) {
BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
std::string Tag = filenameWithoutExtension(Path).str();
+ uint64_t Key = llvm::xxHash64(Path);
BackgroundQueue::Task T([this, Path(std::move(Path))] {
llvm::Optional<WithContext> WithProvidedContext;
if (ContextProvider)
@@ -168,6 +170,7 @@ BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
});
T.QueuePri = IndexFile;
T.Tag = std::move(Tag);
+ T.Key = Key;
return T;
}
diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h
index e8f9468889f2..fbcec7014957 100644
--- a/clang-tools-extra/clangd/index/Background.h
+++ b/clang-tools-extra/clangd/index/Background.h
@@ -76,6 +76,8 @@ public:
llvm::ThreadPriority ThreadPri = llvm::ThreadPriority::Background;
unsigned QueuePri = 0; // Higher-priority tasks will run first.
std::string Tag; // Allows priority to be boosted later.
+ uint64_t Key = 0; // If the key matches a previous task, drop this one.
+ // (in practice this means we never reindex a file).
bool operator<(const Task &O) const { return QueuePri < O.QueuePri; }
};
@@ -114,6 +116,7 @@ public:
private:
void notifyProgress() const; // Requires lock Mu
+ bool adjust(Task &T);
std::mutex Mu;
Stats Stat;
@@ -122,6 +125,7 @@ private:
std::vector<Task> Queue; // max-heap
llvm::StringMap<unsigned> Boosts;
std::function<void(Stats)> OnProgress;
+ llvm::DenseSet<uint64_t> SeenKeys;
};
// Builds an in-memory index by by running the static indexer action over
diff --git a/clang-tools-extra/clangd/index/BackgroundQueue.cpp b/clang-tools-extra/clangd/index/BackgroundQueue.cpp
index 3262a2f46d38..b0dc2acca356 100644
--- a/clang-tools-extra/clangd/index/BackgroundQueue.cpp
+++ b/clang-tools-extra/clangd/index/BackgroundQueue.cpp
@@ -72,10 +72,24 @@ void BackgroundQueue::stop() {
CV.notify_all();
}
+// Tweaks the priority of a newly-enqueued task, or returns false to cancel it.
+bool BackgroundQueue::adjust(Task &T) {
+ // It is tempting to drop duplicates of queued tasks, and merely deprioritize
+ // duplicates of completed tasks (i.e. reindexing on CDB changes). But:
+ // - the background indexer doesn't support reindexing well, e.g. staleness
+ // is checked at *enqueue* time only, and doesn't account for compile flags
+ // - reindexing on compile flags is often a poor use of CPU in practice
+ if (T.Key && !SeenKeys.insert(T.Key).second)
+ return false;
+ T.QueuePri = std::max(T.QueuePri, Boosts.lookup(T.Tag));
+ return true;
+}
+
void BackgroundQueue::push(Task T) {
{
std::lock_guard<std::mutex> Lock(Mu);
- T.QueuePri = std::max(T.QueuePri, Boosts.lookup(T.Tag));
+ if (!adjust(T))
+ return;
Queue.push_back(std::move(T));
std::push_heap(Queue.begin(), Queue.end());
++Stat.Enqueued;
@@ -87,11 +101,13 @@ void BackgroundQueue::push(Task T) {
void BackgroundQueue::append(std::vector<Task> Tasks) {
{
std::lock_guard<std::mutex> Lock(Mu);
- for (Task &T : Tasks)
- T.QueuePri = std::max(T.QueuePri, Boosts.lookup(T.Tag));
- std::move(Tasks.begin(), Tasks.end(), std::back_inserter(Queue));
+ for (Task &T : Tasks) {
+ if (!adjust(T))
+ continue;
+ Queue.push_back(std::move(T));
+ ++Stat.Enqueued;
+ }
std::make_heap(Queue.begin(), Queue.end());
- Stat.Enqueued += Tasks.size();
notifyProgress();
}
CV.notify_all();
diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
index f4a9b2fa2d13..bcbedd2500ce 100644
--- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -9,6 +9,7 @@
#include "index/BackgroundRebuild.h"
#include "clang/Tooling/ArgumentsAdjusters.h"
#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/Threading.h"
#include "gmock/gmock.h"
@@ -666,25 +667,45 @@ TEST_F(BackgroundIndexTest, CmdLineHash) {
// Make sure we only store the Cmd for main file.
EXPECT_FALSE(MSS.loadShard(testPath("A.h"))->Cmd);
- {
- tooling::CompileCommand CmdStored = *MSS.loadShard(testPath("A.cc"))->Cmd;
- EXPECT_EQ(CmdStored.CommandLine, Cmd.CommandLine);
- EXPECT_EQ(CmdStored.Directory, Cmd.Directory);
- }
+ tooling::CompileCommand CmdStored = *MSS.loadShard(testPath("A.cc"))->Cmd;
+ EXPECT_EQ(CmdStored.CommandLine, Cmd.CommandLine);
+ EXPECT_EQ(CmdStored.Directory, Cmd.Directory);
+}
- // FIXME: Changing compile commands should be enough to invalidate the cache.
- FS.Files[testPath("A.cc")] = " ";
- Cmd.CommandLine = {"clang++", "../A.cc", "-Dfoo", "-fsyntax-only"};
- CDB.setCompileCommand(testPath("build/../A.cc"), Cmd);
+TEST_F(BackgroundIndexTest, Reindex) {
+ MockFS FS;
+ llvm::StringMap<std::string> Storage;
+ size_t CacheHits = 0;
+ MemoryShardStorage MSS(Storage, CacheHits);
+ OverlayCDB CDB(/*Base=*/nullptr);
+ BackgroundIndex Idx(FS, CDB, [&](llvm::StringRef) { return &MSS; },
+ /*Opts=*/{});
+
+ // Index a file.
+ FS.Files[testPath("A.cc")] = "int theOldFunction();";
+ tooling::CompileCommand Cmd;
+ Cmd.Filename = "../A.cc";
+ Cmd.Directory = testPath("build");
+ Cmd.CommandLine = {"clang++", "../A.cc", "-fsyntax-only"};
+ CDB.setCompileCommand(testPath("A.cc"), Cmd);
ASSERT_TRUE(Idx.blockUntilIdleForTest());
- EXPECT_FALSE(MSS.loadShard(testPath("A.h"))->Cmd);
+ // Verify the result is indexed and stored.
+ EXPECT_EQ(1u, runFuzzyFind(Idx, "theOldFunction").size());
+ EXPECT_EQ(0u, runFuzzyFind(Idx, "theNewFunction").size());
+ std::string OldShard = Storage.lookup(testPath("A.cc"));
+ EXPECT_NE("", OldShard);
- {
- tooling::CompileCommand CmdStored = *MSS.loadShard(testPath("A.cc"))->Cmd;
- EXPECT_EQ(CmdStored.CommandLine, Cmd.CommandLine);
- EXPECT_EQ(CmdStored.Directory, Cmd.Directory);
- }
+ // Change the content and command, and notify to reindex it.
+ Cmd.CommandLine.push_back("-DFOO");
+ FS.Files[testPath("A.cc")] = "int theNewFunction();";
+ CDB.setCompileCommand(testPath("A.cc"), Cmd);
+ ASSERT_TRUE(Idx.blockUntilIdleForTest());
+
+ // Currently, we will never index the same main file again.
+ EXPECT_EQ(1u, runFuzzyFind(Idx, "theOldFunction").size());
+ EXPECT_EQ(0u, runFuzzyFind(Idx, "theNewFunction").size());
+ EXPECT_EQ(OldShard, Storage.lookup(testPath("A.cc")));
}
class BackgroundIndexRebuilderTest : public testing::Test {
@@ -829,6 +850,31 @@ TEST(BackgroundQueueTest, Boost) {
}
}
+TEST(BackgroundQueueTest, Duplicates) {
+ std::string Sequence;
+ BackgroundQueue::Task A([&] { Sequence.push_back('A'); });
+ A.QueuePri = 100;
+ A.Key = 1;
+ BackgroundQueue::Task B([&] { Sequence.push_back('B'); });
+ // B has no key, and is not subject to duplicate detection.
+ B.QueuePri = 50;
+
+ BackgroundQueue Q;
+ Q.append({A, B, A, B}); // One A is dropped, the other is high priority.
+ Q.work(/*OnIdle=*/[&] {
+ // The first time we go idle, we enqueue the same task again.
+ if (!llvm::is_contained(Sequence, ' ')) {
+ Sequence.push_back(' ');
+ Q.append({A, B, A, B}); // Both As are dropped.
+ } else {
+ Q.stop();
+ }
+ });
+
+ // This could reasonably be "ABB BBA", if we had good *re*indexing support.
+ EXPECT_EQ("ABB BB", Sequence);
+}
+
TEST(BackgroundQueueTest, Progress) {
using testing::AnyOf;
BackgroundQueue::Stats S;