1 files changed, 681 insertions, 876 deletions
diff --git a/deps/v8/src/wasm/module-compiler.cc b/deps/v8/src/wasm/module-compiler.cc
index ab8b49027e..96abaa0a04 100644
--- a/deps/v8/src/wasm/module-compiler.cc
+++ b/deps/v8/src/wasm/module-compiler.cc
@@ -5,6 +5,7 @@
 #include "src/wasm/module-compiler.h"
 
 #include <algorithm>
+#include <memory>
 #include <queue>
 
 #include "src/api/api-inl.h"
@@ -96,10 +97,7 @@ class V8_NODISCARD BackgroundCompileScope {
   std::shared_ptr<NativeModule> native_module_;
 };
 
-enum CompileBaselineOnly : bool {
-  kBaselineOnly = true,
-  kBaselineOrTopTier = false
-};
+enum CompilationTier { kBaseline = 0, kTopTier = 1, kNumTiers = kTopTier + 1 };
 
 // A set of work-stealing queues (vectors of units). Each background compile
 // task owns one of the queues and steals from all others once its own queue
@@ -173,19 +171,15 @@ class CompilationUnitQueues {
     return queues_[task_id].get();
   }
 
-  base::Optional<WasmCompilationUnit> GetNextUnit(
-      Queue* queue, CompileBaselineOnly baseline_only) {
-    // As long as any lower-tier units are outstanding we need to steal them
-    // before executing own higher-tier units.
-    int max_tier = baseline_only ? kBaseline : kTopTier;
-    for (int tier = GetLowestTierWithUnits(); tier <= max_tier; ++tier) {
-      if (auto unit = GetNextUnitOfTier(queue, tier)) {
-        size_t old_units_count =
-            num_units_[tier].fetch_sub(1, std::memory_order_relaxed);
-        DCHECK_LE(1, old_units_count);
-        USE(old_units_count);
-        return unit;
-      }
+  base::Optional<WasmCompilationUnit> GetNextUnit(Queue* queue,
+                                                  CompilationTier tier) {
+    DCHECK_LT(tier, CompilationTier::kNumTiers);
+    if (auto unit = GetNextUnitOfTier(queue, tier)) {
+      size_t old_units_count =
+          num_units_[tier].fetch_sub(1, std::memory_order_relaxed);
+      DCHECK_LE(1, old_units_count);
+      USE(old_units_count);
+      return unit;
     }
     return {};
   }
@@ -210,8 +204,9 @@ class CompilationUnitQueues {
 
     base::MutexGuard guard(&queue->mutex);
     base::Optional<base::MutexGuard> big_units_guard;
-    for (auto pair : {std::make_pair(int{kBaseline}, baseline_units),
-                      std::make_pair(int{kTopTier}, top_tier_units)}) {
+    for (auto pair :
+         {std::make_pair(CompilationTier::kBaseline, baseline_units),
+          std::make_pair(CompilationTier::kTopTier, top_tier_units)}) {
       int tier = pair.first;
       base::Vector<WasmCompilationUnit> units = pair.second;
       if (units.empty()) continue;
@@ -256,26 +251,29 @@ class CompilationUnitQueues {
       queue->top_tier_priority_units.emplace(priority, unit);
     }
     num_priority_units_.fetch_add(1, std::memory_order_relaxed);
-    num_units_[kTopTier].fetch_add(1, std::memory_order_relaxed);
+    num_units_[CompilationTier::kTopTier].fetch_add(1,
+                                                    std::memory_order_relaxed);
   }
 
-  // Get the current total number of units in all queues. This is only a
+  // Get the current number of units in the queue for |tier|. This is only a
   // momentary snapshot, it's not guaranteed that {GetNextUnit} returns a unit
   // if this method returns non-zero.
-  size_t GetTotalSize() const {
-    size_t total = 0;
-    for (auto& atomic_counter : num_units_) {
-      total += atomic_counter.load(std::memory_order_relaxed);
+  size_t GetSizeForTier(CompilationTier tier) const {
+    DCHECK_LT(tier, CompilationTier::kNumTiers);
+    return num_units_[tier].load(std::memory_order_relaxed);
+  }
+
+  void AllowAnotherTopTierJob(uint32_t func_index) {
+    top_tier_compiled_[func_index].store(false, std::memory_order_relaxed);
+  }
+
+  void AllowAnotherTopTierJobForAllFunctions() {
+    for (int i = 0; i < num_declared_functions_; i++) {
+      AllowAnotherTopTierJob(i);
     }
-    return total;
   }
 
  private:
-  // Store tier in int so we can easily loop over it:
-  static constexpr int kBaseline = 0;
-  static constexpr int kTopTier = 1;
-  static constexpr int kNumTiers = kTopTier + 1;
-
   // Functions bigger than {kBigUnitsLimit} will be compiled first, in ascending
   // order of their function body size.
   static constexpr size_t kBigUnitsLimit = 4096;
@@ -315,10 +313,10 @@ class CompilationUnitQueues {
     base::Mutex mutex;
 
     // Can be read concurrently to check whether any elements are in the queue.
-    std::atomic<bool> has_units[kNumTiers];
+    std::atomic<bool> has_units[CompilationTier::kNumTiers];
 
     // Protected by {mutex}:
-    std::priority_queue<BigUnit> units[kNumTiers];
+    std::priority_queue<BigUnit> units[CompilationTier::kNumTiers];
   };
 
   struct QueueImpl : public Queue {
@@ -334,7 +332,7 @@ class CompilationUnitQueues {
     base::Mutex mutex;
 
     // All fields below are protected by {mutex}.
-    std::vector<WasmCompilationUnit> units[kNumTiers];
+    std::vector<WasmCompilationUnit> units[CompilationTier::kNumTiers];
     std::priority_queue<TopTierPriorityUnit> top_tier_priority_units;
     int next_steal_task_id;
   };
@@ -344,19 +342,12 @@ class CompilationUnitQueues {
     return next == static_cast<int>(num_queues) ? 0 : next;
   }
 
-  int GetLowestTierWithUnits() const {
-    for (int tier = 0; tier < kNumTiers; ++tier) {
-      if (num_units_[tier].load(std::memory_order_relaxed) > 0) return tier;
-    }
-    return kNumTiers;
-  }
-
   base::Optional<WasmCompilationUnit> GetNextUnitOfTier(Queue* public_queue,
                                                         int tier) {
     QueueImpl* queue = static_cast<QueueImpl*>(public_queue);
 
     // First check whether there is a priority unit. Execute that first.
-    if (tier == kTopTier) {
+    if (tier == CompilationTier::kTopTier) {
       if (auto unit = GetTopTierPriorityUnit(queue)) {
         return unit;
       }
@@ -430,7 +421,8 @@ class CompilationUnitQueues {
                 true, std::memory_order_relaxed)) {
           return unit;
         }
-        num_units_[kTopTier].fetch_sub(1, std::memory_order_relaxed);
+        num_units_[CompilationTier::kTopTier].fetch_sub(
+            1, std::memory_order_relaxed);
       }
       steal_task_id = queue->next_steal_task_id;
     }
@@ -504,7 +496,8 @@ class CompilationUnitQueues {
           returned_unit = unit;
           break;
         }
-        num_units_[kTopTier].fetch_sub(1, std::memory_order_relaxed);
+        num_units_[CompilationTier::kTopTier].fetch_sub(
+            1, std::memory_order_relaxed);
       }
     }
     base::MutexGuard guard(&queue->mutex);
@@ -520,7 +513,7 @@ class CompilationUnitQueues {
 
   BigUnitsQueue big_units_queue_;
 
-  std::atomic<size_t> num_units_[kNumTiers];
+  std::atomic<size_t> num_units_[CompilationTier::kNumTiers];
   std::atomic<size_t> num_priority_units_{0};
   std::unique_ptr<std::atomic<bool>[]> top_tier_compiled_;
   std::atomic<int> next_queue_to_add{0};
@@ -544,7 +537,12 @@ class CompilationStateImpl {
                        std::shared_ptr<Counters> async_counters,
                        DynamicTiering dynamic_tiering);
   ~CompilationStateImpl() {
-    if (compile_job_->IsValid()) compile_job_->CancelAndDetach();
+    if (js_to_wasm_wrapper_job_->IsValid())
+      js_to_wasm_wrapper_job_->CancelAndDetach();
+    if (baseline_compile_job_->IsValid())
+      baseline_compile_job_->CancelAndDetach();
+    if (top_tier_compile_job_->IsValid())
+      top_tier_compile_job_->CancelAndDetach();
   }
 
   // Call right after the constructor, after the {compilation_state_} field in
@@ -575,8 +573,6 @@ class CompilationStateImpl {
                                      int num_export_wrappers,
                                      ProfileInformation* pgo_info);
 
-  // Initialize the compilation progress after deserialization. This is needed
-  // for recompilation (e.g. for tier down) to work later.
   void InitializeCompilationProgressAfterDeserialization(
       base::Vector<const int> lazy_functions,
       base::Vector<const int> eager_functions);
@@ -591,14 +587,6 @@ class CompilationStateImpl {
   // equivalent to {InitializeCompilationUnits}.
   void AddCompilationUnit(CompilationUnitBuilder* builder, int func_index);
 
-  // Initialize recompilation of the whole module: Setup compilation progress
-  // for recompilation and add the respective compilation units. The callback is
-  // called immediately if no recompilation is needed, or called later
-  // otherwise.
-  void InitializeRecompilation(TieringState new_tiering_state,
-                               std::unique_ptr<CompilationEventCallback>
-                                   recompilation_finished_callback);
-
   // Add the callback to be called on compilation events. Needs to be
   // set before {CommitCompilationUnits} is run to ensure that it receives all
   // events. The callback object must support being deleted from any thread.
@@ -616,7 +604,7 @@ class CompilationStateImpl {
   CompilationUnitQueues::Queue* GetQueueForCompileTask(int task_id);
 
   base::Optional<WasmCompilationUnit> GetNextCompilationUnit(
-      CompilationUnitQueues::Queue*, CompileBaselineOnly);
+      CompilationUnitQueues::Queue*, CompilationTier tier);
 
   std::shared_ptr<JSToWasmWrapperCompilationUnit>
   GetNextJSToWasmWrapperCompilationUnit();
@@ -630,15 +618,21 @@ class CompilationStateImpl {
   void SchedulePublishCompilationResults(
       std::vector<std::unique_ptr<WasmCode>> unpublished_code);
 
-  size_t NumOutstandingCompilations() const;
+  size_t NumOutstandingExportWrappers() const;
+  size_t NumOutstandingCompilations(CompilationTier tier) const;
 
   void SetError();
 
   void WaitForCompilationEvent(CompilationEvent event);
 
-  void SetHighPriority() {
-    // TODO(wasm): Keep a lower priority for TurboFan-only jobs.
-    compile_job_->UpdatePriority(TaskPriority::kUserBlocking);
+  void TierUpAllFunctions();
+
+  void AllowAnotherTopTierJob(uint32_t func_index) {
+    compilation_unit_queues_.AllowAnotherTopTierJob(func_index);
+  }
+
+  void AllowAnotherTopTierJobForAllFunctions() {
+    compilation_unit_queues_.AllowAnotherTopTierJobForAllFunctions();
   }
 
   bool failed() const {
@@ -651,11 +645,6 @@ class CompilationStateImpl {
            outstanding_export_wrappers_ == 0;
   }
 
-  bool recompilation_finished() const {
-    base::MutexGuard guard(&callbacks_mutex_);
-    return outstanding_recompilation_functions_ == 0;
-  }
-
   DynamicTiering dynamic_tiering() const { return dynamic_tiering_; }
 
   Counters* counters() const { return async_counters_.get(); }
@@ -682,15 +671,14 @@ class CompilationStateImpl {
   }
 
  private:
-  // Returns the potentially-updated {function_progress}.
-  uint8_t AddCompilationUnitInternal(CompilationUnitBuilder* builder,
-                                     int function_index,
-                                     uint8_t function_progress);
+  void AddCompilationUnitInternal(CompilationUnitBuilder* builder,
+                                  int function_index,
+                                  uint8_t function_progress);
 
   // Trigger callbacks according to the internal counters below
-  // (outstanding_...), plus the given events.
+  // (outstanding_...).
   // Hold the {callbacks_mutex_} when calling this method.
-  void TriggerCallbacks(base::EnumSet<CompilationEvent> additional_events = {});
+  void TriggerCallbacks();
 
   void PublishCompilationResults(
       std::vector<std::unique_ptr<WasmCode>> unpublished_code);
@@ -726,9 +714,11 @@ class CompilationStateImpl {
   // being accessed concurrently.
   mutable base::Mutex mutex_;
 
-  // The compile job handle, initialized right after construction of
+  // The compile job handles, initialized right after construction of
   // {CompilationStateImpl}.
-  std::unique_ptr<JobHandle> compile_job_;
+  std::unique_ptr<JobHandle> js_to_wasm_wrapper_job_;
+  std::unique_ptr<JobHandle> baseline_compile_job_;
+  std::unique_ptr<JobHandle> top_tier_compile_job_;
 
   // The compilation id to identify trace events linked to this compilation.
   static constexpr int kInvalidCompilationID = -1;
@@ -770,9 +760,6 @@ class CompilationStateImpl {
   size_t bytes_since_last_chunk_ = 0;
   std::vector<uint8_t> compilation_progress_;
 
-  int outstanding_recompilation_functions_ = 0;
-  TieringState tiering_state_ = kTieredUp;
-
   // End of fields protected by {callbacks_mutex_}.
   //////////////////////////////////////////////////////////////////////////////
 
@@ -785,7 +772,6 @@ class CompilationStateImpl {
   using RequiredBaselineTierField = base::BitField8<ExecutionTier, 0, 2>;
   using RequiredTopTierField = base::BitField8<ExecutionTier, 2, 2>;
   using ReachedTierField = base::BitField8<ExecutionTier, 4, 2>;
-  using MissingRecompilationField = base::BitField8<bool, 6, 1>;
 };
 
 CompilationStateImpl* Impl(CompilationState* compilation_state) {
@@ -805,7 +791,7 @@ bool BackgroundCompileScope::cancelled() const {
          Impl(native_module_->compilation_state())->cancelled();
 }
 
-void UpdateFeatureUseCounts(Isolate* isolate, const WasmFeatures& detected) {
+void UpdateFeatureUseCounts(Isolate* isolate, WasmFeatures detected) {
   using Feature = v8::Isolate::UseCounterFeature;
   constexpr static std::pair<WasmFeature, Feature> kUseCounters[] = {
       {kFeature_reftypes, Feature::kWasmRefTypes},
@@ -853,7 +839,17 @@ void CompilationState::AddCallback(
   return Impl(this)->AddCallback(std::move(callback));
 }
 
-void CompilationState::SetHighPriority() { Impl(this)->SetHighPriority(); }
+void CompilationState::TierUpAllFunctions() {
+  Impl(this)->TierUpAllFunctions();
+}
+
+void CompilationState::AllowAnotherTopTierJob(uint32_t func_index) {
+  Impl(this)->AllowAnotherTopTierJob(func_index);
+}
+
+void CompilationState::AllowAnotherTopTierJobForAllFunctions() {
+  Impl(this)->AllowAnotherTopTierJobForAllFunctions();
+}
 
 void CompilationState::InitializeAfterDeserialization(
     base::Vector<const int> lazy_functions,
@@ -868,10 +864,6 @@ bool CompilationState::baseline_compilation_finished() const {
   return Impl(this)->baseline_compilation_finished();
 }
 
-bool CompilationState::recompilation_finished() const {
-  return Impl(this)->recompilation_finished();
-}
-
 void CompilationState::set_compilation_id(int compilation_id) {
   Impl(this)->set_compilation_id(compilation_id);
 }
@@ -920,7 +912,7 @@ const WasmCompilationHint* GetCompilationHint(const WasmModule* module,
 }
 
 CompileStrategy GetCompileStrategy(const WasmModule* module,
-                                   const WasmFeatures& enabled_features,
+                                   WasmFeatures enabled_features,
                                    uint32_t func_index, bool lazy_module) {
   if (lazy_module) return CompileStrategy::kLazy;
   if (!enabled_features.has_compilation_hints()) {
@@ -945,20 +937,24 @@ struct ExecutionTierPair {
   ExecutionTier top_tier;
 };
 
+// Pass the debug state as a separate parameter to avoid data races: the debug
+// state may change between its use here and its use at the call site. To have
+// a consistent view on the debug state, the caller reads the debug state once
+// and then passes it to this function.
 ExecutionTierPair GetDefaultTiersPerModule(NativeModule* native_module,
                                            DynamicTiering dynamic_tiering,
+                                           DebugState is_in_debug_state,
                                            bool lazy_module) {
   const WasmModule* module = native_module->module();
   if (is_asmjs_module(module)) {
     return {ExecutionTier::kTurbofan, ExecutionTier::kTurbofan};
   }
-  // TODO(13224): Use lazy compilation for debug code.
-  if (native_module->IsTieredDown()) {
-    return {ExecutionTier::kLiftoff, ExecutionTier::kLiftoff};
-  }
   if (lazy_module) {
     return {ExecutionTier::kNone, ExecutionTier::kNone};
   }
+  if (is_in_debug_state) {
+    return {ExecutionTier::kLiftoff, ExecutionTier::kLiftoff};
+  }
   ExecutionTier baseline_tier =
       v8_flags.liftoff ? ExecutionTier::kLiftoff : ExecutionTier::kTurbofan;
   bool eager_tier_up = !dynamic_tiering && v8_flags.wasm_tier_up;
@@ -968,14 +964,17 @@ ExecutionTierPair GetDefaultTiersPerModule(NativeModule* native_module,
 }
 
 ExecutionTierPair GetLazyCompilationTiers(NativeModule* native_module,
-                                          uint32_t func_index) {
+                                          uint32_t func_index,
+                                          DebugState is_in_debug_state) {
   DynamicTiering dynamic_tiering =
       Impl(native_module->compilation_state())->dynamic_tiering();
   // For lazy compilation, get the tiers we would use if lazy compilation is
   // disabled.
   constexpr bool kNotLazy = false;
-  ExecutionTierPair tiers =
-      GetDefaultTiersPerModule(native_module, dynamic_tiering, kNotLazy);
+  ExecutionTierPair tiers = GetDefaultTiersPerModule(
+      native_module, dynamic_tiering, is_in_debug_state, kNotLazy);
+  // If we are in debug mode, we ignore compilation hints.
+  if (is_in_debug_state) return tiers;
 
   // Check if compilation hints override default tiering behaviour.
   if (native_module->enabled_features().has_compilation_hints()) {
@@ -1012,7 +1011,7 @@ class CompilationUnitBuilder {
   void AddImportUnit(uint32_t func_index) {
     DCHECK_GT(native_module_->module()->num_imported_functions, func_index);
     baseline_units_.emplace_back(func_index, ExecutionTier::kNone,
-                                 kNoDebugging);
+                                 kNotForDebugging);
   }
 
   void AddJSToWasmWrapperUnit(
@@ -1021,35 +1020,22 @@ class CompilationUnitBuilder {
   }
 
   void AddBaselineUnit(int func_index, ExecutionTier tier) {
-    baseline_units_.emplace_back(func_index, tier, kNoDebugging);
+    baseline_units_.emplace_back(func_index, tier, kNotForDebugging);
   }
 
   void AddTopTierUnit(int func_index, ExecutionTier tier) {
-    tiering_units_.emplace_back(func_index, tier, kNoDebugging);
+    tiering_units_.emplace_back(func_index, tier, kNotForDebugging);
   }
 
-  void AddDebugUnit(int func_index) {
-    baseline_units_.emplace_back(func_index, ExecutionTier::kLiftoff,
-                                 kForDebugging);
-  }
-
-  void AddRecompilationUnit(int func_index, ExecutionTier tier) {
-    // For recompilation, just treat all units like baseline units.
-    baseline_units_.emplace_back(
-        func_index, tier,
-        tier == ExecutionTier::kLiftoff ? kForDebugging : kNoDebugging);
-  }
-
-  bool Commit() {
+  void Commit() {
     if (baseline_units_.empty() && tiering_units_.empty() &&
         js_to_wasm_wrapper_units_.empty()) {
-      return false;
+      return;
     }
     compilation_state()->CommitCompilationUnits(
         base::VectorOf(baseline_units_), base::VectorOf(tiering_units_),
         base::VectorOf(js_to_wasm_wrapper_units_));
     Clear();
-    return true;
   }
 
   void Clear() {
@@ -1072,38 +1058,20 @@ class CompilationUnitBuilder {
       js_to_wasm_wrapper_units_;
 };
 
-WasmError GetWasmErrorWithName(ModuleWireBytes wire_bytes,
-                               const WasmFunction* func,
-                               const WasmModule* module, WasmError error) {
-  WasmName name = wire_bytes.GetNameOrNull(func, module);
-  if (name.begin() == nullptr) {
-    return WasmError(error.offset(), "Compiling function #%d failed: %s",
-                     func->func_index, error.message().c_str());
-  } else {
-    TruncatedUserString<> truncated_name(name);
-    return WasmError(error.offset(),
-                     "Compiling function #%d:\"%.*s\" failed: %s",
-                     func->func_index, truncated_name.length(),
-                     truncated_name.start(), error.message().c_str());
-  }
-}
-
-void SetCompileError(ErrorThrower* thrower, ModuleWireBytes wire_bytes,
-                     const WasmFunction* func, const WasmModule* module,
-                     WasmError error) {
-  thrower->CompileFailed(GetWasmErrorWithName(std::move(wire_bytes), func,
-                                              module, std::move(error)));
-}
-
 DecodeResult ValidateSingleFunction(const WasmModule* module, int func_index,
                                     base::Vector<const uint8_t> code,
-                                    AccountingAllocator* allocator,
                                     WasmFeatures enabled_features) {
+  // Sometimes functions get validated unpredictably in the background, for
+  // debugging or when inlining one function into another. We check here if that
+  // is the case, and exit early if so.
+  if (module->function_was_validated(func_index)) return {};
   const WasmFunction* func = &module->functions[func_index];
   FunctionBody body{func->sig, func->code.offset(), code.begin(), code.end()};
   WasmFeatures detected_features;
-  return ValidateFunctionBody(allocator, enabled_features, module,
-                              &detected_features, body);
+  DecodeResult result =
+      ValidateFunctionBody(enabled_features, module, &detected_features, body);
+  if (result.ok()) module->set_function_validated(func_index);
+  return result;
 }
 
 enum OnlyLazyFunctions : bool {
@@ -1111,37 +1079,6 @@ enum OnlyLazyFunctions : bool {
   kOnlyLazyFunctions = true,
 };
 
-void ValidateSequentially(
-    const WasmModule* module, NativeModule* native_module, Counters* counters,
-    AccountingAllocator* allocator, ErrorThrower* thrower,
-    OnlyLazyFunctions only_lazy_functions = kAllFunctions) {
-  DCHECK(!thrower->error());
-  uint32_t start = module->num_imported_functions;
-  uint32_t end = start + module->num_declared_functions;
-  auto enabled_features = native_module->enabled_features();
-  bool lazy_module = v8_flags.wasm_lazy_compilation;
-  for (uint32_t func_index = start; func_index < end; func_index++) {
-    // Skip non-lazy functions if requested.
-    if (only_lazy_functions) {
-      CompileStrategy strategy =
-          GetCompileStrategy(module, enabled_features, func_index, lazy_module);
-      if (strategy != CompileStrategy::kLazy &&
-          strategy != CompileStrategy::kLazyBaselineEagerTopTier) {
-        continue;
-      }
-    }
-
-    ModuleWireBytes wire_bytes{native_module->wire_bytes()};
-    const WasmFunction* func = &module->functions[func_index];
-    base::Vector<const uint8_t> code = wire_bytes.GetFunctionBytes(func);
-    DecodeResult result = ValidateSingleFunction(module, func_index, code,
-                                                 allocator, enabled_features);
-    if (result.failed()) {
-      SetCompileError(thrower, wire_bytes, func, module, result.error());
-    }
-  }
-}
-
 bool IsLazyModule(const WasmModule* module) {
   return v8_flags.wasm_lazy_compilation ||
          (v8_flags.asm_wasm_lazy_compilation && is_asmjs_module(module));
@@ -1187,18 +1124,17 @@ bool CompileLazy(Isolate* isolate, WasmInstanceObject instance,
 
   TRACE_LAZY("Compiling wasm-function#%d.\n", func_index);
 
-  base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
-                                       ? base::ThreadTicks::Now()
-                                       : base::ThreadTicks();
-
   CompilationStateImpl* compilation_state =
       Impl(native_module->compilation_state());
-  ExecutionTierPair tiers = GetLazyCompilationTiers(native_module, func_index);
+  DebugState is_in_debug_state = native_module->IsInDebugState();
+  ExecutionTierPair tiers =
+      GetLazyCompilationTiers(native_module, func_index, is_in_debug_state);
 
   DCHECK_LE(native_module->num_imported_functions(), func_index);
   DCHECK_LT(func_index, native_module->num_functions());
-  WasmCompilationUnit baseline_unit{func_index, tiers.baseline_tier,
-                                    kNoDebugging};
+  WasmCompilationUnit baseline_unit{
+      func_index, tiers.baseline_tier,
+      is_in_debug_state ? kForDebugging : kNotForDebugging};
   CompilationEnv env = native_module->CreateCompilationEnv();
   // TODO(wasm): Use an assembler buffer cache for lazy compilation.
   AssemblerBufferCache* assembler_buffer_cache = nullptr;
@@ -1207,11 +1143,6 @@ bool CompileLazy(Isolate* isolate, WasmInstanceObject instance,
       &env, compilation_state->GetWireBytesStorage().get(), counters,
       assembler_buffer_cache, &detected_features);
   compilation_state->OnCompilationStopped(detected_features);
-  if (!thread_ticks.IsNull()) {
-    native_module->UpdateCPUDuration(
-        (base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
-        tiers.baseline_tier);
-  }
 
   // During lazy compilation, we can only get compilation errors when
   // {--wasm-lazy-validation} is enabled. Otherwise, the module was fully
@@ -1245,7 +1176,8 @@ bool CompileLazy(Isolate* isolate, WasmInstanceObject instance,
   if (GetCompileStrategy(module, native_module->enabled_features(), func_index,
                          lazy_module) == CompileStrategy::kLazy &&
       tiers.baseline_tier < tiers.top_tier) {
-    WasmCompilationUnit tiering_unit{func_index, tiers.top_tier, kNoDebugging};
+    WasmCompilationUnit tiering_unit{func_index, tiers.top_tier,
+                                     kNotForDebugging};
     compilation_state->CommitTopTierCompilationUnit(tiering_unit);
   }
   return true;
@@ -1262,15 +1194,15 @@ void ThrowLazyCompilationError(Isolate* isolate,
   base::Vector<const uint8_t> code =
       compilation_state->GetWireBytesStorage()->GetCode(func->code);
 
-  WasmEngine* engine = GetWasmEngine();
   auto enabled_features = native_module->enabled_features();
-  DecodeResult decode_result = ValidateSingleFunction(
-      module, func_index, code, engine->allocator(), enabled_features);
+  DecodeResult decode_result =
+      ValidateSingleFunction(module, func_index, code, enabled_features);
 
   CHECK(decode_result.failed());
   wasm::ErrorThrower thrower(isolate, nullptr);
-  SetCompileError(&thrower, ModuleWireBytes(native_module->wire_bytes()), func,
-                  module, decode_result.error());
+  thrower.CompileFailed(GetWasmErrorWithName(native_module->wire_bytes(),
+                                             func_index, module,
+                                             std::move(decode_result).error()));
 }
 
 class TransitiveTypeFeedbackProcessor {
@@ -1321,7 +1253,9 @@ class TransitiveTypeFeedbackProcessor {
   DisallowGarbageCollection no_gc_scope_;
   WasmInstanceObject instance_;
   const WasmModule* const module_;
-  base::MutexGuard mutex_guard;
+  // TODO(jkummerow): Check if it makes a difference to apply any updates
+  // as a single batch at the end.
+  base::SharedMutexGuard<base::kExclusive> mutex_guard;
   std::unordered_map<uint32_t, FunctionTypeFeedback>& feedback_for_function_;
   std::set<int> queue_;
 };
@@ -1455,12 +1389,13 @@ void TriggerTierUp(WasmInstanceObject instance, int func_index) {
   CompilationStateImpl* compilation_state =
       Impl(native_module->compilation_state());
   WasmCompilationUnit tiering_unit{func_index, ExecutionTier::kTurbofan,
-                                   kNoDebugging};
+                                   kNotForDebugging};
 
   const WasmModule* module = native_module->module();
   int priority;
   {
-    base::MutexGuard mutex_guard(&module->type_feedback.mutex);
+    base::SharedMutexGuard<base::kExclusive> mutex_guard(
+        &module->type_feedback.mutex);
     int array_index =
         wasm::declared_function_index(instance.module(), func_index);
     instance.tiering_budget_array()[array_index] = v8_flags.wasm_tiering_budget;
@@ -1477,7 +1412,7 @@ void TriggerTierUp(WasmInstanceObject instance, int func_index) {
 
   // Before adding the tier-up unit or increasing priority, do process type
   // feedback for best code generation.
-  if (v8_flags.wasm_speculative_inlining) {
+  if (native_module->enabled_features().has_inlining()) {
     // TODO(jkummerow): we could have collisions here if different instances
     // of the same module have collected different feedback. If that ever
     // becomes a problem, figure out a solution.
@@ -1489,21 +1424,21 @@ void TriggerTierUp(WasmInstanceObject instance, int func_index) {
 
 void TierUpNowForTesting(Isolate* isolate, WasmInstanceObject instance,
                          int func_index) {
-  if (v8_flags.wasm_speculative_inlining) {
+  NativeModule* native_module = instance.module_object().native_module();
+  if (native_module->enabled_features().has_inlining()) {
     TransitiveTypeFeedbackProcessor::Process(instance, func_index);
   }
-  auto* native_module = instance.module_object().native_module();
-  wasm::GetWasmEngine()->CompileFunction(isolate, native_module, func_index,
+  wasm::GetWasmEngine()->CompileFunction(isolate->counters(), native_module,
+                                         func_index,
                                          wasm::ExecutionTier::kTurbofan);
   CHECK(!native_module->compilation_state()->failed());
 }
 
 namespace {
 
-void RecordStats(CodeT codet, Counters* counters) {
-  if (codet.is_off_heap_trampoline()) return;
-  Code code = FromCodeT(codet);
-  counters->wasm_generated_code_size()->Increment(code.raw_body_size());
+void RecordStats(Code code, Counters* counters) {
+  if (!code.has_instruction_stream()) return;
+  counters->wasm_generated_code_size()->Increment(code.body_size());
   counters->wasm_reloc_size()->Increment(code.relocation_info().length());
 }
 
@@ -1570,17 +1505,8 @@ constexpr uint8_t kMainTaskId = 0;
 // Run by the {BackgroundCompileJob} (on any thread).
 CompilationExecutionResult ExecuteCompilationUnits(
     std::weak_ptr<NativeModule> native_module, Counters* counters,
-    JobDelegate* delegate, CompileBaselineOnly baseline_only) {
+    JobDelegate* delegate, CompilationTier tier) {
   TRACE_EVENT0("v8.wasm", "wasm.ExecuteCompilationUnits");
-
-  // Execute JS to Wasm wrapper units first, so that they are ready to be
-  // finalized by the main thread when the kFinishedBaselineCompilation event is
-  // triggered.
-  if (ExecuteJSToWasmWrapperCompilationUnits(native_module, delegate) ==
-      kYield) {
-    return kYield;
-  }
-
   // These fields are initialized in a {BackgroundCompileScope} before
   // starting compilation.
   base::Optional<CompilationEnv> env;
@@ -1596,10 +1522,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
 
   WasmFeatures detected_features = WasmFeatures::None();
 
-  base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
-                                       ? base::ThreadTicks::Now()
-                                       : base::ThreadTicks();
-
   // Preparation (synchronized): Initialize the fields above and get the first
   // compilation unit.
   {
@@ -1609,8 +1531,8 @@ CompilationExecutionResult ExecuteCompilationUnits(
     wire_bytes = compile_scope.compilation_state()->GetWireBytesStorage();
     module = compile_scope.native_module()->shared_module();
     queue = compile_scope.compilation_state()->GetQueueForCompileTask(task_id);
-    unit = compile_scope.compilation_state()->GetNextCompilationUnit(
-        queue, baseline_only);
+    unit =
+        compile_scope.compilation_state()->GetNextCompilationUnit(queue, tier);
     if (!unit) return kNoMoreUnits;
   }
   TRACE_COMPILE("ExecuteCompilationUnits (task id %d)\n", task_id);
@@ -1659,12 +1581,7 @@ CompilationExecutionResult ExecuteCompilationUnits(
       // Yield or get next unit.
       if (yield ||
           !(unit = compile_scope.compilation_state()->GetNextCompilationUnit(
-                queue, baseline_only))) {
-        if (!thread_ticks.IsNull()) {
-          compile_scope.native_module()->UpdateCPUDuration(
-              (base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
-              current_tier);
-        }
+                queue, tier))) {
         std::vector<std::unique_ptr<WasmCode>> unpublished_code =
             compile_scope.native_module()->AddCompiledCode(
                 base::VectorOf(std::move(results_to_publish)));
@@ -1686,12 +1603,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
       bool liftoff_finished = unit->tier() != current_tier &&
                               unit->tier() == ExecutionTier::kTurbofan;
       if (batch_full || liftoff_finished) {
-        if (!thread_ticks.IsNull()) {
-          base::ThreadTicks thread_ticks_now = base::ThreadTicks::Now();
-          compile_scope.native_module()->UpdateCPUDuration(
-              (thread_ticks_now - thread_ticks).InMicroseconds(), current_tier);
-          thread_ticks = thread_ticks_now;
-        }
         std::vector<std::unique_ptr<WasmCode>> unpublished_code =
             compile_scope.native_module()->AddCompiledCode(
                 base::VectorOf(std::move(results_to_publish)));
@@ -1717,6 +1628,19 @@ int AddExportWrapperUnits(Isolate* isolate, NativeModule* native_module,
     uint32_t canonical_type_index =
         native_module->module()
             ->isorecursive_canonical_type_ids[function.sig_index];
+    int wrapper_index =
+        GetExportWrapperIndex(canonical_type_index, function.imported);
+    if (wrapper_index < isolate->heap()->js_to_wasm_wrappers().length()) {
+      MaybeObject existing_wrapper =
+          isolate->heap()->js_to_wasm_wrappers().Get(wrapper_index);
+      if (existing_wrapper.IsStrongOrWeak() &&
+          !existing_wrapper.GetHeapObject().IsUndefined()) {
+        // Skip wrapper compilation as the wrapper is already cached.
+        // Note that this does not guarantee that the wrapper is still cached
+        // at the moment at which the WasmInternalFunction is instantiated.
+        continue;
+      }
+    }
     JSToWasmWrapperKey key(function.imported, canonical_type_index);
     if (keys.insert(key).second) {
       auto unit = std::make_shared<JSToWasmWrapperCompilationUnit>(
@@ -1740,15 +1664,12 @@ int AddImportWrapperUnits(NativeModule* native_module,
   for (int func_index = 0; func_index < num_imported_functions; func_index++) {
     const WasmFunction& function =
         native_module->module()->functions[func_index];
-    if (!IsJSCompatibleSignature(function.sig, native_module->module(),
-                                 native_module->enabled_features())) {
-      continue;
-    }
+    if (!IsJSCompatibleSignature(function.sig)) continue;
     uint32_t canonical_type_index =
         native_module->module()
             ->isorecursive_canonical_type_ids[function.sig_index];
     WasmImportWrapperCache::CacheKey key(
-        compiler::kDefaultImportCallKind, canonical_type_index,
+        kDefaultImportCallKind, canonical_type_index,
         static_cast<int>(function.sig->parameter_count()), kNoSuspend);
     auto it = keys.insert(key);
     if (it.second) {
@@ -1761,32 +1682,9 @@ int AddImportWrapperUnits(NativeModule* native_module,
   return static_cast<int>(keys.size());
 }
 
-void InitializeLazyCompilation(NativeModule* native_module) {
-  const bool lazy_module = IsLazyModule(native_module->module());
-  auto* module = native_module->module();
-
-  uint32_t start = module->num_imported_functions;
-  uint32_t end = start + module->num_declared_functions;
-  base::Optional<CodeSpaceWriteScope> lazy_code_space_write_scope;
-  for (uint32_t func_index = start; func_index < end; func_index++) {
-    CompileStrategy strategy = GetCompileStrategy(
-        module, native_module->enabled_features(), func_index, lazy_module);
-    if (strategy == CompileStrategy::kLazy ||
-        strategy == CompileStrategy::kLazyBaselineEagerTopTier) {
-      // Open a single scope for all following calls to {UseLazyStub()}, instead
-      // of flipping page permissions for each {func_index} individually.
-      if (!lazy_code_space_write_scope.has_value()) {
-        lazy_code_space_write_scope.emplace(native_module);
-      }
-      native_module->UseLazyStub(func_index);
-    }
-  }
-}
-
 std::unique_ptr<CompilationUnitBuilder> InitializeCompilation(
     Isolate* isolate, NativeModule* native_module,
     ProfileInformation* pgo_info) {
-  InitializeLazyCompilation(native_module);
   CompilationStateImpl* compilation_state =
       Impl(native_module->compilation_state());
   auto builder = std::make_unique<CompilationUnitBuilder>(native_module);
@@ -1799,7 +1697,7 @@ std::unique_ptr<CompilationUnitBuilder> InitializeCompilation(
 }
 
 bool MayCompriseLazyFunctions(const WasmModule* module,
-                              const WasmFeatures& enabled_features) {
+                              WasmFeatures enabled_features) {
   if (IsLazyModule(module)) return true;
   if (enabled_features.has_compilation_hints()) return true;
 #ifdef ENABLE_SLOW_DCHECKS
@@ -1854,9 +1752,7 @@ class CompilationTimeCallback : public CompilationEventCallback {
           true,                                    // success
           native_module->liftoff_code_size(),      // code_size_in_bytes
           native_module->liftoff_bailout_count(),  // liftoff_bailout_count
-          duration.InMicroseconds(),               // wall_clock_duration_in_us
-          static_cast<int64_t>(                    // cpu_time_duration_in_us
-              native_module->baseline_compilation_cpu_duration())};
+          duration.InMicroseconds()};              // wall_clock_duration_in_us
       metrics_recorder_->DelayMainThreadEvent(event, context_id_);
     }
     if (compilation_event == CompilationEvent::kFailedCompilation) {
@@ -1869,9 +1765,7 @@ class CompilationTimeCallback : public CompilationEventCallback {
           false,                                   // success
           native_module->liftoff_code_size(),      // code_size_in_bytes
           native_module->liftoff_bailout_count(),  // liftoff_bailout_count
-          duration.InMicroseconds(),               // wall_clock_duration_in_us
-          static_cast<int64_t>(                    // cpu_time_duration_in_us
-              native_module->baseline_compilation_cpu_duration())};
+          duration.InMicroseconds()};              // wall_clock_duration_in_us
       metrics_recorder_->DelayMainThreadEvent(event, context_id_);
     }
   }
@@ -1885,26 +1779,44 @@ class CompilationTimeCallback : public CompilationEventCallback {
   const CompileMode compile_mode_;
 };
 
+WasmError ValidateFunctions(const WasmModule* module,
+                            base::Vector<const uint8_t> wire_bytes,
+                            WasmFeatures enabled_features,
+                            OnlyLazyFunctions only_lazy_functions) {
+  DCHECK_EQ(module->origin, kWasmOrigin);
+  if (only_lazy_functions &&
+      !MayCompriseLazyFunctions(module, enabled_features)) {
+    return {};
+  }
+
+  std::function<bool(int)> filter;  // Initially empty for "all functions".
+  if (only_lazy_functions) {
+    const bool is_lazy_module = IsLazyModule(module);
+    filter = [module, enabled_features, is_lazy_module](int func_index) {
+      CompileStrategy strategy = GetCompileStrategy(module, enabled_features,
+                                                    func_index, is_lazy_module);
+      return strategy == CompileStrategy::kLazy ||
+             strategy == CompileStrategy::kLazyBaselineEagerTopTier;
+    };
+  }
+  // Call {ValidateFunctions} in the module decoder.
+  return ValidateFunctions(module, enabled_features, wire_bytes, filter);
+}
+
+WasmError ValidateFunctions(const NativeModule& native_module,
+                            OnlyLazyFunctions only_lazy_functions) {
+  return ValidateFunctions(native_module.module(), native_module.wire_bytes(),
+                           native_module.enabled_features(),
+                           only_lazy_functions);
+}
+
 void CompileNativeModule(Isolate* isolate,
                          v8::metrics::Recorder::ContextId context_id,
-                         ErrorThrower* thrower, const WasmModule* wasm_module,
+                         ErrorThrower* thrower,
                          std::shared_ptr<NativeModule> native_module,
                          ProfileInformation* pgo_info) {
   CHECK(!v8_flags.jitless);
-  ModuleWireBytes wire_bytes(native_module->wire_bytes());
-  if (!v8_flags.wasm_lazy_validation && wasm_module->origin == kWasmOrigin &&
-      MayCompriseLazyFunctions(wasm_module,
-                               native_module->enabled_features())) {
-    // Validate wasm modules for lazy compilation if requested. Never validate
-    // asm.js modules as these are valid by construction (additionally a CHECK
-    // will catch this during lazy compilation).
-    ValidateSequentially(wasm_module, native_module.get(), isolate->counters(),
-                         isolate->allocator(), thrower, kOnlyLazyFunctions);
-    // On error: Return and leave the module in an unexecutable state.
-    if (thrower->error()) return;
-  }
-
-  DCHECK_GE(kMaxInt, native_module->module()->num_declared_functions);
+  const WasmModule* module = native_module->module();
 
   // The callback captures a shared ptr to the semaphore.
   auto* compilation_state = Impl(native_module->compilation_state());
@@ -1919,72 +1831,113 @@ void CompileNativeModule(Isolate* isolate,
       InitializeCompilation(isolate, native_module.get(), pgo_info);
   compilation_state->InitializeCompilationUnits(std::move(builder));
 
+  // Validate wasm modules for lazy compilation if requested. Never validate
+  // asm.js modules as these are valid by construction (additionally a CHECK
+  // will catch this during lazy compilation).
+  if (!v8_flags.wasm_lazy_validation && module->origin == kWasmOrigin) {
+    DCHECK(!thrower->error());
+    if (WasmError validation_error =
+            ValidateFunctions(*native_module, kOnlyLazyFunctions)) {
+      thrower->CompileFailed(std::move(validation_error));
+      return;
+    }
+  }
+
   compilation_state->WaitForCompilationEvent(
       CompilationEvent::kFinishedExportWrappers);
 
-  if (compilation_state->failed()) {
-    DCHECK_IMPLIES(IsLazyModule(wasm_module), !v8_flags.wasm_lazy_validation);
-    ValidateSequentially(wasm_module, native_module.get(), isolate->counters(),
-                         isolate->allocator(), thrower);
-    CHECK(thrower->error());
-    return;
-  }
-
-  compilation_state->FinalizeJSToWasmWrappers(isolate, wasm_module);
+  if (!compilation_state->failed()) {
+    compilation_state->FinalizeJSToWasmWrappers(isolate, module);
 
-  compilation_state->WaitForCompilationEvent(
-      CompilationEvent::kFinishedBaselineCompilation);
+    compilation_state->WaitForCompilationEvent(
+        CompilationEvent::kFinishedBaselineCompilation);
 
-  compilation_state->PublishDetectedFeatures(isolate);
+    compilation_state->PublishDetectedFeatures(isolate);
+  }
 
   if (compilation_state->failed()) {
-    DCHECK_IMPLIES(IsLazyModule(wasm_module), !v8_flags.wasm_lazy_validation);
-    ValidateSequentially(wasm_module, native_module.get(), isolate->counters(),
-                         isolate->allocator(), thrower);
-    CHECK(thrower->error());
+    DCHECK_IMPLIES(IsLazyModule(module), !v8_flags.wasm_lazy_validation);
+    WasmError validation_error =
+        ValidateFunctions(*native_module, kAllFunctions);
+    CHECK(validation_error.has_error());
+    thrower->CompileFailed(std::move(validation_error));
   }
 }
 
+class AsyncCompileJSToWasmWrapperJob final : public JobTask {
+ public:
+  explicit AsyncCompileJSToWasmWrapperJob(
+      std::weak_ptr<NativeModule> native_module)
+      : native_module_(std::move(native_module)),
+        engine_barrier_(GetWasmEngine()->GetBarrierForBackgroundCompile()) {}
+
+  void Run(JobDelegate* delegate) override {
+    auto engine_scope = engine_barrier_->TryLock();
+    if (!engine_scope) return;
+    ExecuteJSToWasmWrapperCompilationUnits(native_module_, delegate);
+  }
+
+  size_t GetMaxConcurrency(size_t worker_count) const override {
+    BackgroundCompileScope compile_scope(native_module_);
+    if (compile_scope.cancelled()) return 0;
+    size_t flag_limit = static_cast<size_t>(
+        std::max(1, v8_flags.wasm_num_compilation_tasks.value()));
+    // NumOutstandingExportWrappers() does not reflect the units that running
+    // workers are processing, thus add the current worker count to that number.
+    return std::min(
+        flag_limit,
+        worker_count +
+            compile_scope.compilation_state()->NumOutstandingExportWrappers());
+  }
+
+ private:
+  std::weak_ptr<NativeModule> native_module_;
+  std::shared_ptr<OperationsBarrier> engine_barrier_;
+};
+
 class BackgroundCompileJob final : public JobTask {
  public:
   explicit BackgroundCompileJob(std::weak_ptr<NativeModule> native_module,
-                                std::shared_ptr<Counters> async_counters)
+                                std::shared_ptr<Counters> async_counters,
+                                CompilationTier tier)
       : native_module_(std::move(native_module)),
         engine_barrier_(GetWasmEngine()->GetBarrierForBackgroundCompile()),
-        async_counters_(std::move(async_counters)) {}
+        async_counters_(std::move(async_counters)),
+        tier_(tier) {}
 
   void Run(JobDelegate* delegate) override {
     auto engine_scope = engine_barrier_->TryLock();
     if (!engine_scope) return;
     ExecuteCompilationUnits(native_module_, async_counters_.get(), delegate,
-                            kBaselineOrTopTier);
+                            tier_);
   }
 
   size_t GetMaxConcurrency(size_t worker_count) const override {
     BackgroundCompileScope compile_scope(native_module_);
     if (compile_scope.cancelled()) return 0;
+    size_t flag_limit = static_cast<size_t>(
+        std::max(1, v8_flags.wasm_num_compilation_tasks.value()));
     // NumOutstandingCompilations() does not reflect the units that running
     // workers are processing, thus add the current worker count to that number.
-    return std::min(
-        static_cast<size_t>(v8_flags.wasm_num_compilation_tasks),
-        worker_count +
-            compile_scope.compilation_state()->NumOutstandingCompilations());
+    return std::min(flag_limit,
+                    worker_count + compile_scope.compilation_state()
+                                       ->NumOutstandingCompilations(tier_));
   }
 
  private:
   std::weak_ptr<NativeModule> native_module_;
   std::shared_ptr<OperationsBarrier> engine_barrier_;
   const std::shared_ptr<Counters> async_counters_;
+  const CompilationTier tier_;
 };
 
 }  // namespace
 
 std::shared_ptr<NativeModule> CompileToNativeModule(
-    Isolate* isolate, const WasmFeatures& enabled, ErrorThrower* thrower,
-    std::shared_ptr<const WasmModule> module, const ModuleWireBytes& wire_bytes,
+    Isolate* isolate, WasmFeatures enabled_features, ErrorThrower* thrower,
+    std::shared_ptr<const WasmModule> module, ModuleWireBytes wire_bytes,
     int compilation_id, v8::metrics::Recorder::ContextId context_id,
     ProfileInformation* pgo_info) {
-  const WasmModule* wasm_module = module.get();
   WasmEngine* engine = GetWasmEngine();
   base::OwnedVector<uint8_t> wire_bytes_copy =
       base::OwnedVector<uint8_t>::Of(wire_bytes.module_bytes());
@@ -1993,20 +1946,20 @@ std::shared_ptr<NativeModule> CompileToNativeModule(
   // bytes of the temporary key and the new key have the same base pointer and
   // we can skip the full bytes comparison.
   std::shared_ptr<NativeModule> native_module = engine->MaybeGetNativeModule(
-      wasm_module->origin, wire_bytes_copy.as_vector(), isolate);
+      module->origin, wire_bytes_copy.as_vector(), isolate);
   if (native_module) {
-    CompileJsToWasmWrappers(isolate, wasm_module);
+    CompileJsToWasmWrappers(isolate, module.get());
     return native_module;
   }
 
   base::Optional<TimedHistogramScope> wasm_compile_module_time_scope;
   if (base::TimeTicks::IsHighResolution()) {
     wasm_compile_module_time_scope.emplace(SELECT_WASM_COUNTER(
-        isolate->counters(), wasm_module->origin, wasm_compile, module_time));
+        isolate->counters(), module->origin, wasm_compile, module_time));
   }
 
   // Embedder usage count for declared shared memories.
-  if (wasm_module->has_shared_memory) {
+  if (module->has_shared_memory) {
     isolate->CountUsage(v8::Isolate::UseCounterFeature::kWasmSharedMemory);
   }
 
@@ -2017,22 +1970,27 @@ std::shared_ptr<NativeModule> CompileToNativeModule(
       wasm::WasmCodeManager::EstimateNativeModuleCodeSize(
           module.get(), include_liftoff,
           DynamicTiering{v8_flags.wasm_dynamic_tiering.value()});
-  native_module =
-      engine->NewNativeModule(isolate, enabled, module, code_size_estimate);
+  native_module = engine->NewNativeModule(isolate, enabled_features, module,
+                                          code_size_estimate);
   native_module->SetWireBytes(std::move(wire_bytes_copy));
   native_module->compilation_state()->set_compilation_id(compilation_id);
-  // Sync compilation is user blocking, so we increase the priority.
-  native_module->compilation_state()->SetHighPriority();
 
-  CompileNativeModule(isolate, context_id, thrower, wasm_module, native_module,
-                      pgo_info);
-  bool cache_hit = !engine->UpdateNativeModuleCache(thrower->error(),
-                                                    &native_module, isolate);
-  if (thrower->error()) return {};
+  CompileNativeModule(isolate, context_id, thrower, native_module, pgo_info);
 
-  if (cache_hit) {
-    CompileJsToWasmWrappers(isolate, wasm_module);
-    return native_module;
+  if (thrower->error()) {
+    engine->UpdateNativeModuleCache(true, std::move(native_module), isolate);
+    return {};
+  }
+
+  std::shared_ptr<NativeModule> cached_native_module =
+      engine->UpdateNativeModuleCache(false, native_module, isolate);
+
+  if (cached_native_module != native_module) {
+    // Do not use {module} or {native_module} any more; use
+    // {cached_native_module} instead.
+    module.reset();
+    native_module.reset();
+    return cached_native_module;
   }
 
   // Ensure that the code objects are logged before returning.
@@ -2041,56 +1999,18 @@ std::shared_ptr<NativeModule> CompileToNativeModule(
   return native_module;
 }
 
-void RecompileNativeModule(NativeModule* native_module,
-                           TieringState tiering_state) {
-  // Install a callback to notify us once background recompilation finished.
-  auto recompilation_finished_semaphore = std::make_shared<base::Semaphore>(0);
-  auto* compilation_state = Impl(native_module->compilation_state());
-
-  class RecompilationFinishedCallback : public CompilationEventCallback {
-   public:
-    explicit RecompilationFinishedCallback(
-        std::shared_ptr<base::Semaphore> recompilation_finished_semaphore)
-        : recompilation_finished_semaphore_(
-              std::move(recompilation_finished_semaphore)) {}
-
-    void call(CompilationEvent event) override {
-      DCHECK_NE(CompilationEvent::kFailedCompilation, event);
-      if (event == CompilationEvent::kFinishedRecompilation) {
-        recompilation_finished_semaphore_->Signal();
-      }
-    }
-
-   private:
-    std::shared_ptr<base::Semaphore> recompilation_finished_semaphore_;
-  };
-
-  // The callback captures a shared ptr to the semaphore.
-  // Initialize the compilation units and kick off background compile tasks.
-  compilation_state->InitializeRecompilation(
-      tiering_state, std::make_unique<RecompilationFinishedCallback>(
-                         recompilation_finished_semaphore));
-
-  constexpr JobDelegate* kNoDelegate = nullptr;
-  ExecuteCompilationUnits(compilation_state->native_module_weak(),
-                          compilation_state->counters(), kNoDelegate,
-                          kBaselineOnly);
-  recompilation_finished_semaphore->Wait();
-  DCHECK(!compilation_state->failed());
-}
-
 AsyncCompileJob::AsyncCompileJob(
-    Isolate* isolate, const WasmFeatures& enabled,
-    std::unique_ptr<byte[]> bytes_copy, size_t length, Handle<Context> context,
+    Isolate* isolate, WasmFeatures enabled_features,
+    base::OwnedVector<const uint8_t> bytes, Handle<Context> context,
     Handle<Context> incumbent_context, const char* api_method_name,
     std::shared_ptr<CompilationResultResolver> resolver, int compilation_id)
     : isolate_(isolate),
       api_method_name_(api_method_name),
-      enabled_features_(enabled),
+      enabled_features_(enabled_features),
       dynamic_tiering_(DynamicTiering{v8_flags.wasm_dynamic_tiering.value()}),
       start_time_(base::TimeTicks::Now()),
-      bytes_copy_(std::move(bytes_copy)),
-      wire_bytes_(bytes_copy_.get(), bytes_copy_.get() + length),
+      bytes_copy_(std::move(bytes)),
+      wire_bytes_(bytes_copy_.as_vector()),
       resolver_(std::move(resolver)),
       compilation_id_(compilation_id) {
   TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"),
@@ -2119,13 +2039,124 @@ void AsyncCompileJob::Abort() {
   GetWasmEngine()->RemoveCompileJob(this);
 }
 
-class AsyncStreamingProcessor final : public StreamingProcessor {
+// {ValidateFunctionsStreamingJobData} holds information that is shared between
+// the {AsyncStreamingProcessor} and the {ValidateFunctionsStreamingJob}. It
+// lives in the {AsyncStreamingProcessor} and is updated from both classes.
+struct ValidateFunctionsStreamingJobData {
+  struct Unit {
+    // {func_index == -1} represents an "invalid" unit.
+    int func_index = -1;
+    base::Vector<const uint8_t> code;
+
+    // Check whether the unit is valid.
+    operator bool() const {
+      DCHECK_LE(-1, func_index);
+      return func_index >= 0;
+    }
+  };
+
+  void Initialize(int num_declared_functions) {
+    DCHECK_NULL(units);
+    units = base::OwnedVector<Unit>::NewForOverwrite(num_declared_functions);
+    // Initially {next == end}.
+    next_available_unit.store(units.begin(), std::memory_order_relaxed);
+    end_of_available_units.store(units.begin(), std::memory_order_relaxed);
+  }
+
+  void AddUnit(int declared_func_index, base::Vector<const uint8_t> code,
+               JobHandle* job_handle) {
+    DCHECK_NOT_NULL(units);
+    // Write new unit to {*end}, then increment {end}. There is only one thread
+    // adding new units, so no further synchronization needed.
+    Unit* ptr = end_of_available_units.load(std::memory_order_relaxed);
+    // Check invariant: {next <= end}.
+    DCHECK_LE(next_available_unit.load(std::memory_order_relaxed), ptr);
+    *ptr++ = {declared_func_index, code};
+    // Use release semantics, so whoever loads this pointer (using acquire
+    // semantics) sees all our previous stores.
+    end_of_available_units.store(ptr, std::memory_order_release);
+    size_t total_units_added = ptr - units.begin();
+    // Periodically notify concurrency increase. This has overhead, so avoid
+    // calling it too often. As long as threads are still running they will
+    // continue processing new units anyway, and if background threads validate
+    // faster than we can add units, then only notifying after increasingly long
+    // delays is the right thing to do to avoid too many small validation tasks.
+    // We notify on each power of two after 16 units, and every 16k units (just
+    // to have *some* upper limit and avoiding to pile up too many units).
+    // Additionally, notify after receiving the last unit of the module.
+    if ((total_units_added >= 16 &&
+         base::bits::IsPowerOfTwo(total_units_added)) ||
+        (total_units_added % (16 * 1024)) == 0 || ptr == units.end()) {
+      job_handle->NotifyConcurrencyIncrease();
+    }
+  }
+
+  size_t NumOutstandingUnits() const {
+    Unit* next = next_available_unit.load(std::memory_order_relaxed);
+    Unit* end = end_of_available_units.load(std::memory_order_relaxed);
+    DCHECK_LE(next, end);
+    return end - next;
+  }
+
+  // Retrieve one unit to validate; returns an "invalid" unit if nothing is in
+  // the queue.
+  Unit GetUnit() {
+    // Use an acquire load to synchronize with the store in {AddUnit}. All units
+    // before this {end} are fully initialized and ready to execute.
+    Unit* end = end_of_available_units.load(std::memory_order_acquire);
+    Unit* next = next_available_unit.load(std::memory_order_relaxed);
+    while (next < end) {
+      if (next_available_unit.compare_exchange_weak(
+              next, next + 1, std::memory_order_relaxed)) {
+        return *next;
+      }
+      // Otherwise retry with updated {next} pointer.
+    }
+    return {};
+  }
+
+  base::OwnedVector<Unit> units;
+  std::atomic<Unit*> next_available_unit;
+  std::atomic<Unit*> end_of_available_units;
+  std::atomic<bool> found_error{false};
+};
+
+class ValidateFunctionsStreamingJob final : public JobTask {
  public:
-  explicit AsyncStreamingProcessor(AsyncCompileJob* job,
-                                   std::shared_ptr<Counters> counters,
-                                   AccountingAllocator* allocator);
+  ValidateFunctionsStreamingJob(const WasmModule* module,
+                                WasmFeatures enabled_features,
+                                ValidateFunctionsStreamingJobData* data)
+      : module_(module), enabled_features_(enabled_features), data_(data) {}
 
-  ~AsyncStreamingProcessor() override;
+  void Run(JobDelegate* delegate) override {
+    TRACE_EVENT0("v8.wasm", "wasm.ValidateFunctionsStreaming");
+    using Unit = ValidateFunctionsStreamingJobData::Unit;
+    while (Unit unit = data_->GetUnit()) {
+      DecodeResult result = ValidateSingleFunction(
+          module_, unit.func_index, unit.code, enabled_features_);
+
+      if (result.failed()) {
+        data_->found_error.store(true, std::memory_order_relaxed);
+        break;
+      }
+      // After validating one function, check if we should yield.
+      if (delegate->ShouldYield()) break;
+    }
+  }
+
+  size_t GetMaxConcurrency(size_t worker_count) const override {
+    return worker_count + data_->NumOutstandingUnits();
+  }
+
+ private:
+  const WasmModule* const module_;
+  const WasmFeatures enabled_features_;
+  ValidateFunctionsStreamingJobData* data_;
+};
+
+class AsyncStreamingProcessor final : public StreamingProcessor {
+ public:
+  explicit AsyncStreamingProcessor(AsyncCompileJob* job);
 
   bool ProcessModuleHeader(base::Vector<const uint8_t> bytes,
                            uint32_t offset) override;
@@ -2140,14 +2171,13 @@ class AsyncStreamingProcessor final : public StreamingProcessor {
                                 int code_section_start,
                                 int code_section_length) override;
 
-  void ProcessFunctionBody(base::Vector<const uint8_t> bytes,
+  bool ProcessFunctionBody(base::Vector<const uint8_t> bytes,
                            uint32_t offset) override;
 
   void OnFinishedChunk() override;
 
-  void OnFinishedStream(base::OwnedVector<uint8_t> bytes) override;
-
-  void OnError(const WasmError&) override;
+  void OnFinishedStream(base::OwnedVector<const uint8_t> bytes,
+                        bool after_error) override;
 
   void OnAbort() override;
 
@@ -2155,11 +2185,6 @@ class AsyncStreamingProcessor final : public StreamingProcessor {
                    base::Vector<const uint8_t> module_bytes) override;
 
  private:
-  enum ErrorLocation { kErrorInFunction, kErrorInSection };
-  // Finishes the AsyncCompileJob with an error.
-  void FinishAsyncCompileJobWithError(
-      const WasmError&, ErrorLocation error_location = kErrorInSection);
-
   void CommitCompilationUnits();
 
   ModuleDecoder decoder_;
@@ -2168,20 +2193,19 @@ class AsyncStreamingProcessor final : public StreamingProcessor {
   int num_functions_ = 0;
   bool prefix_cache_hit_ = false;
   bool before_code_section_ = true;
-  std::shared_ptr<Counters> async_counters_;
-  AccountingAllocator* allocator_;
+  ValidateFunctionsStreamingJobData validate_functions_job_data_;
+  std::unique_ptr<JobHandle> validate_functions_job_handle_;
 
   // Running hash of the wire bytes up to code section size, but excluding the
   // code section itself. Used by the {NativeModuleCache} to detect potential
   // duplicate modules.
-  size_t prefix_hash_;
+  size_t prefix_hash_ = 0;
 };
 
 std::shared_ptr<StreamingDecoder> AsyncCompileJob::CreateStreamingDecoder() {
   DCHECK_NULL(stream_);
   stream_ = StreamingDecoder::CreateAsyncStreamingDecoder(
-      std::make_unique<AsyncStreamingProcessor>(
-          this, isolate_->async_counters(), isolate_->allocator()));
+      std::make_unique<AsyncStreamingProcessor>(this));
   return stream_;
 }
 
@@ -2195,9 +2219,7 @@ AsyncCompileJob::~AsyncCompileJob() {
   }
   // Tell the streaming decoder that the AsyncCompileJob is not available
   // anymore.
-  // TODO(ahaas): Is this notification really necessary? Check
-  // https://crbug.com/888170.
-  if (stream_) stream_->NotifyCompilationEnded();
+  if (stream_) stream_->NotifyCompilationDiscarded();
   CancelPendingForegroundTask();
   isolate_->global_handles()->Destroy(native_context_.location());
   isolate_->global_handles()->Destroy(incumbent_context_.location());
@@ -2213,15 +2235,12 @@ void AsyncCompileJob::CreateNativeModule(
     isolate_->CountUsage(v8::Isolate::UseCounterFeature::kWasmSharedMemory);
   }
 
-  // TODO(wasm): Improve efficiency of storing module wire bytes. Only store
-  // relevant sections, not function bodies
-
   // Create the module object and populate with compiled functions and
   // information needed at instantiation time.
 
   native_module_ = GetWasmEngine()->NewNativeModule(
       isolate_, enabled_features_, std::move(module), code_size_estimate);
-  native_module_->SetWireBytes({std::move(bytes_copy_), wire_bytes_.length()});
+  native_module_->SetWireBytes(std::move(bytes_copy_));
   native_module_->compilation_state()->set_compilation_id(compilation_id_);
 }
 
@@ -2255,15 +2274,15 @@ void AsyncCompileJob::PrepareRuntimeObjects() {
 void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) {
   TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"),
                "wasm.FinishAsyncCompile");
+  if (stream_) {
+    stream_->NotifyNativeModuleCreated(native_module_);
+  }
   bool is_after_deserialization = !module_object_.is_null();
-  auto compilation_state = Impl(native_module_->compilation_state());
   if (!is_after_deserialization) {
-    if (stream_) {
-      stream_->NotifyNativeModuleCreated(native_module_);
-    }
     PrepareRuntimeObjects();
   }
 
+  auto compilation_state = Impl(native_module_->compilation_state());
   // Measure duration of baseline compilation or deserialization from cache.
   if (base::TimeTicks::IsHighResolution()) {
     base::TimeDelta duration = base::TimeTicks::Now() - start_time_;
@@ -2281,9 +2300,7 @@ void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) {
           !compilation_state->failed(),             // success
           native_module_->turbofan_code_size(),     // code_size_in_bytes
           native_module_->liftoff_bailout_count(),  // liftoff_bailout_count
-          duration.InMicroseconds(),                // wall_clock_duration_in_us
-          static_cast<int64_t>(                     // cpu_time_duration_in_us
-              native_module_->baseline_compilation_cpu_duration())};
+          duration.InMicroseconds()};               // wall_clock_duration_in_us
       isolate_->metrics_recorder()->DelayMainThreadEvent(event, context_id_);
     }
   }
@@ -2321,51 +2338,37 @@ void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) {
   // We can only update the feature counts once the entire compile is done.
   compilation_state->PublishDetectedFeatures(isolate_);
 
-  // We might need to recompile the module for debugging, if the debugger was
-  // enabled while streaming compilation was running. Since handling this while
-  // compiling via streaming is tricky, we just tier down now, before publishing
-  // the module.
-  if (native_module_->IsTieredDown()) native_module_->RecompileForTiering();
+  // We might need debug code for the module, if the debugger was enabled while
+  // streaming compilation was running. Since handling this while compiling via
+  // streaming is tricky, we just remove all code which may have been generated,
+  // and compile debug code lazily.
+  if (native_module_->IsInDebugState()) {
+    native_module_->RemoveCompiledCode(
+        NativeModule::RemoveFilter::kRemoveNonDebugCode);
+  }
 
   // Finally, log all generated code (it does not matter if this happens
   // repeatedly in case the script is shared).
   native_module_->LogWasmCodes(isolate_, module_object_->script());
 
-  FinishModule();
+  FinishSuccessfully();
 }
 
-void AsyncCompileJob::DecodeFailed(const WasmError& error) {
-  ErrorThrower thrower(isolate_, api_method_name_);
-  thrower.CompileFailed(error);
+void AsyncCompileJob::Failed() {
   // {job} keeps the {this} pointer alive.
-  std::shared_ptr<AsyncCompileJob> job =
+  std::unique_ptr<AsyncCompileJob> job =
       GetWasmEngine()->RemoveCompileJob(this);
-  resolver_->OnCompilationFailed(thrower.Reify());
-}
 
-void AsyncCompileJob::AsyncCompileFailed() {
+  // Revalidate the whole module to produce a deterministic error message.
+  constexpr bool kValidate = true;
+  ModuleResult result = DecodeWasmModule(
+      enabled_features_, wire_bytes_.module_bytes(), kValidate, kWasmOrigin);
+  CHECK(result.failed());
   ErrorThrower thrower(isolate_, api_method_name_);
-  DCHECK_EQ(native_module_->module()->origin, kWasmOrigin);
-  ValidateSequentially(native_module_->module(), native_module_.get(),
-                       isolate_->counters(), isolate_->allocator(), &thrower);
-  DCHECK(thrower.error());
-  // {job} keeps the {this} pointer alive.
-  std::shared_ptr<AsyncCompileJob> job =
-      GetWasmEngine()->RemoveCompileJob(this);
+  thrower.CompileFailed(std::move(result).error());
   resolver_->OnCompilationFailed(thrower.Reify());
 }
 
-void AsyncCompileJob::AsyncCompileSucceeded(Handle<WasmModuleObject> result) {
-  TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"),
-               "wasm.OnCompilationSucceeded");
-  // We have to make sure that an "incumbent context" is available in case
-  // the module's start function calls out to Blink.
-  Local<v8::Context> backup_incumbent_context =
-      Utils::ToLocal(incumbent_context_);
-  v8::Context::BackupIncumbentScope incumbent(backup_incumbent_context);
-  resolver_->OnCompilationSucceeded(result);
-}
-
 class AsyncCompileJob::CompilationStateCallback
     : public CompilationEventCallback {
  public:
@@ -2385,12 +2388,14 @@ class AsyncCompileJob::CompilationStateCallback
           // Install the native module in the cache, or reuse a conflicting one.
           // If we get a conflicting module, wait until we are back in the
           // main thread to update {job_->native_module_} to avoid a data race.
-          std::shared_ptr<NativeModule> native_module = job_->native_module_;
-          bool cache_hit = !GetWasmEngine()->UpdateNativeModuleCache(
-              false, &native_module, job_->isolate_);
-          DCHECK_EQ(cache_hit, native_module != job_->native_module_);
-          job_->DoSync<CompileFinished>(cache_hit ? std::move(native_module)
-                                                  : nullptr);
+          std::shared_ptr<NativeModule> cached_native_module =
+              GetWasmEngine()->UpdateNativeModuleCache(
+                  false, job_->native_module_, job_->isolate_);
+          if (cached_native_module == job_->native_module_) {
+            // There was no cached module.
+            cached_native_module = nullptr;
+          }
+          job_->DoSync<FinishCompilation>(std::move(cached_native_module));
         }
         break;
       case CompilationEvent::kFinishedCompilationChunk:
@@ -2403,16 +2408,11 @@ class AsyncCompileJob::CompilationStateCallback
         if (job_->DecrementAndCheckFinisherCount(kCompilation)) {
           // Don't update {job_->native_module_} to avoid data races with other
           // compilation threads. Use a copy of the shared pointer instead.
-          std::shared_ptr<NativeModule> native_module = job_->native_module_;
-          GetWasmEngine()->UpdateNativeModuleCache(true, &native_module,
+          GetWasmEngine()->UpdateNativeModuleCache(true, job_->native_module_,
                                                    job_->isolate_);
-          job_->DoSync<CompileFailed>();
+          job_->DoSync<Fail>();
         }
         break;
-      case CompilationEvent::kFinishedRecompilation:
-        // This event can happen out of order, hence don't remember this in
-        // {last_event_}.
-        return;
     }
 #ifdef DEBUG
     last_event_ = event;
@@ -2549,38 +2549,6 @@ void AsyncCompileJob::NextStep(Args&&... args) {
   step_.reset(new Step(std::forward<Args>(args)...));
 }
 
-WasmError ValidateLazilyCompiledFunctions(const WasmModule* module,
-                                          ModuleWireBytes wire_bytes,
-                                          WasmFeatures enabled_features) {
-  if (v8_flags.wasm_lazy_validation) return {};
-  if (!MayCompriseLazyFunctions(module, enabled_features)) return {};
-
-  auto allocator = GetWasmEngine()->allocator();
-
-  // TODO(clemensb): Parallelize this.
-  const bool is_lazy_module = IsLazyModule(module);
-  for (const WasmFunction& function : module->declared_functions()) {
-    if (module->function_was_validated(function.func_index)) continue;
-    base::Vector<const uint8_t> code = wire_bytes.GetFunctionBytes(&function);
-
-    CompileStrategy strategy = GetCompileStrategy(
-        module, enabled_features, function.func_index, is_lazy_module);
-    if (strategy != CompileStrategy::kLazy &&
-        strategy != CompileStrategy::kLazyBaselineEagerTopTier) {
-      continue;
-    }
-    DecodeResult function_result = ValidateSingleFunction(
-        module, function.func_index, code, allocator, enabled_features);
-    if (function_result.failed()) {
-      WasmError error = std::move(function_result).error();
-      return GetWasmErrorWithName(wire_bytes, &function, module,
-                                  std::move(error));
-    }
-    module->set_function_validated(function.func_index);
-  }
-  return {};
-}
-
 //==========================================================================
 // Step 1: (async) Decode the module.
 //==========================================================================
@@ -2600,24 +2568,23 @@ class AsyncCompileJob::DecodeModule : public AsyncCompileJob::CompileStep {
       TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"),
                    "wasm.DecodeModule");
       auto enabled_features = job->enabled_features_;
-      result = DecodeWasmModule(
-          enabled_features, job->wire_bytes_.start(), job->wire_bytes_.end(),
-          false, kWasmOrigin, counters_, metrics_recorder_, job->context_id(),
-          DecodingMethod::kAsync, GetWasmEngine()->allocator());
+      result =
+          DecodeWasmModule(enabled_features, job->wire_bytes_.module_bytes(),
+                           false, kWasmOrigin, counters_, metrics_recorder_,
+                           job->context_id(), DecodingMethod::kAsync);
 
       // Validate lazy functions here if requested.
-      if (result.ok()) {
+      if (result.ok() && !v8_flags.wasm_lazy_validation) {
         const WasmModule* module = result.value().get();
-        WasmError validation_error = ValidateLazilyCompiledFunctions(
-            module, job->wire_bytes_, job->enabled_features_);
-        if (validation_error.has_error()) {
+        if (WasmError validation_error =
+                ValidateFunctions(module, job->wire_bytes_.module_bytes(),
+                                  job->enabled_features_, kOnlyLazyFunctions))
           result = ModuleResult{std::move(validation_error)};
-        }
       }
     }
     if (result.failed()) {
       // Decoding failure; reject the promise and clean up.
-      job->DoSync<DecodeFail>(std::move(result).error());
+      job->DoSync<Fail>();
     } else {
       // Decode passed.
       std::shared_ptr<WasmModule> module = std::move(result).value();
@@ -2636,24 +2603,7 @@ class AsyncCompileJob::DecodeModule : public AsyncCompileJob::CompileStep {
 };
 
 //==========================================================================
-// Step 1b: (sync) Fail decoding the module.
-//==========================================================================
-class AsyncCompileJob::DecodeFail : public CompileStep {
- public:
-  explicit DecodeFail(WasmError error) : error_(std::move(error)) {}
-
- private:
-  WasmError error_;
-
-  void RunInForeground(AsyncCompileJob* job) override {
-    TRACE_COMPILE("(1b) Decoding failed.\n");
-    // {job_} is deleted in DecodeFailed, therefore the {return}.
-    return job->DecodeFailed(error_);
-  }
-};
-
-//==========================================================================
-// Step 2 (sync): Create heap-allocated data and start compile.
+// Step 2 (sync): Create heap-allocated data and start compilation.
 //==========================================================================
 class AsyncCompileJob::PrepareAndStartCompile : public CompileStep {
  public:
@@ -2685,13 +2635,10 @@ class AsyncCompileJob::PrepareAndStartCompile : public CompileStep {
       // Note that we only need to validate lazily compiled functions, others
       // will be validated during eager compilation.
       DCHECK(start_compilation_);
-      if (ValidateLazilyCompiledFunctions(
-              module_.get(), ModuleWireBytes{job->native_module_->wire_bytes()},
-              job->native_module_->enabled_features())
+      if (!v8_flags.wasm_lazy_validation &&
+          ValidateFunctions(*job->native_module_, kOnlyLazyFunctions)
               .has_error()) {
-        // TODO(clemensb): Use the error message instead of re-validation in
-        // {AsyncCompileFailed}.
-        job->AsyncCompileFailed();
+        job->Failed();
         return;
       }
     }
@@ -2735,37 +2682,18 @@ class AsyncCompileJob::PrepareAndStartCompile : public CompileStep {
 };
 
 //==========================================================================
-// Step 3a (sync): Compilation failed.
-//==========================================================================
-class AsyncCompileJob::CompileFailed : public CompileStep {
- private:
-  void RunInForeground(AsyncCompileJob* job) override {
-    TRACE_COMPILE("(3a) Compilation failed\n");
-    DCHECK(job->native_module_->compilation_state()->failed());
-
-    // {job_} is deleted in AsyncCompileFailed, therefore the {return}.
-    return job->AsyncCompileFailed();
-  }
-};
-
-//==========================================================================
-// Step 3b (sync): Compilation finished.
+// Step 3 (sync): Compilation finished.
 //==========================================================================
-class AsyncCompileJob::CompileFinished : public CompileStep {
+class AsyncCompileJob::FinishCompilation : public CompileStep {
  public:
-  explicit CompileFinished(std::shared_ptr<NativeModule> cached_native_module)
+  explicit FinishCompilation(std::shared_ptr<NativeModule> cached_native_module)
       : cached_native_module_(std::move(cached_native_module)) {}
 
  private:
   void RunInForeground(AsyncCompileJob* job) override {
-    TRACE_COMPILE("(3b) Compilation finished\n");
+    TRACE_COMPILE("(3) Compilation finished\n");
     if (cached_native_module_) {
       job->native_module_ = cached_native_module_;
-    } else {
-      DCHECK(!job->native_module_->compilation_state()->failed());
-      // Sample the generated code size when baseline compilation finished.
-      job->native_module_->SampleCodeSize(job->isolate_->counters(),
-                                          NativeModule::kAfterBaseline);
     }
     // Then finalize and publish the generated module.
     job->FinishCompile(cached_native_module_ != nullptr);
@@ -2774,80 +2702,44 @@ class AsyncCompileJob::CompileFinished : public CompileStep {
   std::shared_ptr<NativeModule> cached_native_module_;
 };
 
-void AsyncCompileJob::FinishModule() {
+//==========================================================================
+// Step 4 (sync): Decoding or compilation failed.
+//==========================================================================
+class AsyncCompileJob::Fail : public CompileStep {
+ private:
+  void RunInForeground(AsyncCompileJob* job) override {
+    TRACE_COMPILE("(4) Async compilation failed.\n");
+    // {job_} is deleted in {Failed}, therefore the {return}.
+    return job->Failed();
+  }
+};
+
+void AsyncCompileJob::FinishSuccessfully() {
   TRACE_COMPILE("(4) Finish module...\n");
-  AsyncCompileSucceeded(module_object_);
+  {
+    TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"),
+                 "wasm.OnCompilationSucceeded");
+    // We have to make sure that an "incumbent context" is available in case
+    // the module's start function calls out to Blink.
+    Local<v8::Context> backup_incumbent_context =
+        Utils::ToLocal(incumbent_context_);
+    v8::Context::BackupIncumbentScope incumbent(backup_incumbent_context);
+    resolver_->OnCompilationSucceeded(module_object_);
+  }
   GetWasmEngine()->RemoveCompileJob(this);
 }
 
-AsyncStreamingProcessor::AsyncStreamingProcessor(
-    AsyncCompileJob* job, std::shared_ptr<Counters> async_counters,
-    AccountingAllocator* allocator)
+AsyncStreamingProcessor::AsyncStreamingProcessor(AsyncCompileJob* job)
     : decoder_(job->enabled_features_),
       job_(job),
-      compilation_unit_builder_(nullptr),
-      async_counters_(async_counters),
-      allocator_(allocator) {}
-
-AsyncStreamingProcessor::~AsyncStreamingProcessor() {
-  if (job_->native_module_ && job_->native_module_->wire_bytes().empty()) {
-    // Clean up the temporary cache entry.
-    GetWasmEngine()->StreamingCompilationFailed(prefix_hash_);
-  }
-}
-
-void AsyncStreamingProcessor::FinishAsyncCompileJobWithError(
-    const WasmError& error, ErrorLocation error_location) {
-  DCHECK(error.has_error());
-  // Make sure all background tasks stopped executing before we change the state
-  // of the AsyncCompileJob to DecodeFail.
-  job_->background_task_manager_.CancelAndWait();
-
-  // Record event metrics.
-  auto duration = base::TimeTicks::Now() - job_->start_time_;
-  job_->metrics_event_.success = false;
-  job_->metrics_event_.streamed = true;
-  job_->metrics_event_.module_size_in_bytes = job_->wire_bytes_.length();
-  job_->metrics_event_.function_count = num_functions_;
-  job_->metrics_event_.wall_clock_duration_in_us = duration.InMicroseconds();
-  job_->isolate_->metrics_recorder()->DelayMainThreadEvent(job_->metrics_event_,
-                                                           job_->context_id_);
-
-  // Check if there is already a CompiledModule, in which case we have to clean
-  // up the CompilationStateImpl as well.
-  if (job_->native_module_) {
-    CompilationStateImpl* impl =
-        Impl(job_->native_module_->compilation_state());
-
-    if (error_location == kErrorInFunction) {
-      impl->SetError();
-    }
-    impl->CancelCompilation(CompilationStateImpl::kCancelUnconditionally);
-    if (error_location == kErrorInSection) {
-      job_->DoSync<AsyncCompileJob::DecodeFail,
-                   AsyncCompileJob::kUseExistingForegroundTask>(error);
-    }
-    // Clear the {compilation_unit_builder_} if it exists. This is needed
-    // because there is a check in the destructor of the
-    // {CompilationUnitBuilder} that it is empty.
-    if (compilation_unit_builder_) compilation_unit_builder_->Clear();
-  } else {
-    job_->DoSync<AsyncCompileJob::DecodeFail>(error);
-  }
-}
+      compilation_unit_builder_(nullptr) {}
 
 // Process the module header.
 bool AsyncStreamingProcessor::ProcessModuleHeader(
     base::Vector<const uint8_t> bytes, uint32_t offset) {
   TRACE_STREAMING("Process module header...\n");
-  decoder_.StartDecoding(job_->isolate()->counters(),
-                         job_->isolate()->metrics_recorder(),
-                         job_->context_id(), GetWasmEngine()->allocator());
   decoder_.DecodeModuleHeader(bytes, offset);
-  if (!decoder_.ok()) {
-    FinishAsyncCompileJobWithError(decoder_.FinishDecoding().error());
-    return false;
-  }
+  if (!decoder_.ok()) return false;
   prefix_hash_ = GetWireBytesHash(bytes);
   return true;
 }
@@ -2870,10 +2762,7 @@ bool AsyncStreamingProcessor::ProcessSection(SectionCode section_code,
   if (section_code == SectionCode::kUnknownSectionCode) {
     size_t bytes_consumed = ModuleDecoder::IdentifyUnknownSection(
         &decoder_, bytes, offset, &section_code);
-    if (!decoder_.ok()) {
-      FinishAsyncCompileJobWithError(decoder_.FinishDecoding().error());
-      return false;
-    }
+    if (!decoder_.ok()) return false;
     if (section_code == SectionCode::kUnknownSectionCode) {
       // Skip unknown sections that we do not know how to handle.
       return true;
@@ -2883,11 +2772,7 @@ bool AsyncStreamingProcessor::ProcessSection(SectionCode section_code,
     bytes = bytes.SubVector(bytes_consumed, bytes.size());
   }
   decoder_.DecodeSection(section_code, bytes, offset);
-  if (!decoder_.ok()) {
-    FinishAsyncCompileJobWithError(decoder_.FinishDecoding().error());
-    return false;
-  }
-  return true;
+  return decoder_.ok();
 }
 
 // Start the code section.
@@ -2903,7 +2788,6 @@ bool AsyncStreamingProcessor::ProcessCodeSectionHeader(
                                     static_cast<uint32_t>(code_section_length));
   if (!decoder_.CheckFunctionsCount(static_cast<uint32_t>(num_functions),
                                     functions_mismatch_error_offset)) {
-    FinishAsyncCompileJobWithError(decoder_.FinishDecoding().error());
     return false;
   }
 
@@ -2944,7 +2828,7 @@ bool AsyncStreamingProcessor::ProcessCodeSectionHeader(
 }
 
 // Process a function body.
-void AsyncStreamingProcessor::ProcessFunctionBody(
+bool AsyncStreamingProcessor::ProcessFunctionBody(
     base::Vector<const uint8_t> bytes, uint32_t offset) {
   TRACE_STREAMING("Process function body %d ...\n", num_functions_);
   uint32_t func_index =
@@ -2957,15 +2841,7 @@ void AsyncStreamingProcessor::ProcessFunctionBody(
 
   if (prefix_cache_hit_) {
     // Don't compile yet if we might have a cache hit.
-    return;
-  }
-
-  // Bail out after the {prefix_cache_hit_}, because if {prefix_cache_hit_} is
-  // true, the native module does not exist.
-  if (job_->native_module_->compilation_state()->failed()) {
-    // There has already been an error, there is no need to do any more
-    // validation or compiling.
-    return;
+    return true;
   }
 
   const WasmModule* module = decoder_.module();
@@ -2979,20 +2855,24 @@ void AsyncStreamingProcessor::ProcessFunctionBody(
       (strategy == CompileStrategy::kLazy ||
        strategy == CompileStrategy::kLazyBaselineEagerTopTier);
   if (validate_lazily_compiled_function) {
-    // The native module does not own the wire bytes until {SetWireBytes} is
-    // called in {OnFinishedStream}. Validation must use {bytes} parameter.
-    DecodeResult result = ValidateSingleFunction(module, func_index, bytes,
-                                                 allocator_, enabled_features);
-
-    if (result.failed()) {
-      FinishAsyncCompileJobWithError(result.error(), kErrorInFunction);
-      return;
+    // {bytes} is part of a section buffer owned by the streaming decoder. The
+    // streaming decoder is held alive by the {AsyncCompileJob}, so we can just
+    // use the {bytes} vector as long as the {AsyncCompileJob} is still running.
+    if (!validate_functions_job_handle_) {
+      validate_functions_job_data_.Initialize(module->num_declared_functions);
+      validate_functions_job_handle_ = V8::GetCurrentPlatform()->CreateJob(
+          TaskPriority::kUserVisible,
+          std::make_unique<ValidateFunctionsStreamingJob>(
+              module, enabled_features, &validate_functions_job_data_));
     }
+    validate_functions_job_data_.AddUnit(func_index, bytes,
+                                         validate_functions_job_handle_.get());
   }
 
   auto* compilation_state = Impl(job_->native_module_->compilation_state());
   compilation_state->AddCompilationUnit(compilation_unit_builder_.get(),
                                         func_index);
+  return true;
 }
 
 void AsyncStreamingProcessor::CommitCompilationUnits() {
@@ -3007,21 +2887,27 @@ void AsyncStreamingProcessor::OnFinishedChunk() {
 
 // Finish the processing of the stream.
 void AsyncStreamingProcessor::OnFinishedStream(
-    base::OwnedVector<uint8_t> bytes) {
+    base::OwnedVector<const uint8_t> bytes, bool after_error) {
   TRACE_STREAMING("Finish stream...\n");
-  DCHECK_EQ(NativeModuleCache::PrefixHash(bytes.as_vector()), prefix_hash_);
-  ModuleResult result = decoder_.FinishDecoding();
-  if (result.failed()) {
-    FinishAsyncCompileJobWithError(result.error());
-    return;
+  ModuleResult module_result = decoder_.FinishDecoding();
+  if (module_result.failed()) after_error = true;
+
+  if (validate_functions_job_handle_) {
+    // Wait for background validation to finish, then check if a validation
+    // error was found.
+    // TODO(13447): Do not block here; register validation as another finisher
+    // instead.
+    validate_functions_job_handle_->Join();
+    validate_functions_job_handle_.reset();
+    if (validate_functions_job_data_.found_error) after_error = true;
   }
 
   job_->wire_bytes_ = ModuleWireBytes(bytes.as_vector());
-  job_->bytes_copy_ = bytes.ReleaseData();
+  job_->bytes_copy_ = std::move(bytes);
 
   // Record event metrics.
   auto duration = base::TimeTicks::Now() - job_->start_time_;
-  job_->metrics_event_.success = true;
+  job_->metrics_event_.success = !after_error;
   job_->metrics_event_.streamed = true;
   job_->metrics_event_.module_size_in_bytes = job_->wire_bytes_.length();
   job_->metrics_event_.function_count = num_functions_;
@@ -3029,15 +2915,39 @@ void AsyncStreamingProcessor::OnFinishedStream(
   job_->isolate_->metrics_recorder()->DelayMainThreadEvent(job_->metrics_event_,
                                                            job_->context_id_);
 
+  if (after_error) {
+    if (job_->native_module_ && job_->native_module_->wire_bytes().empty()) {
+      // Clean up the temporary cache entry.
+      GetWasmEngine()->StreamingCompilationFailed(prefix_hash_);
+    }
+    // Calling {Failed} will invalidate the {AsyncCompileJob} and delete {this}.
+    job_->Failed();
+    return;
+  }
+
+  std::shared_ptr<WasmModule> module = std::move(module_result).value();
+
+  // At this point we identified the module as valid (except maybe for function
+  // bodies, if lazy validation is enabled).
+  // This DCHECK could be considered slow, but it only happens once per async
+  // module compilation, and we only re-decode the module structure, without
+  // validation function bodies. Overall this does not add a lot of overhead.
+  DCHECK(DecodeWasmModule(job_->enabled_features_,
+                          job_->bytes_copy_.as_vector(),
+                          /* validate functions */ false, kWasmOrigin)
+             .ok());
+
+  DCHECK_EQ(NativeModuleCache::PrefixHash(job_->wire_bytes_.module_bytes()),
+            prefix_hash_);
   if (prefix_cache_hit_) {
     // Restart as an asynchronous, non-streaming compilation. Most likely
     // {PrepareAndStartCompile} will get the native module from the cache.
     const bool include_liftoff = v8_flags.liftoff;
     size_t code_size_estimate =
         wasm::WasmCodeManager::EstimateNativeModuleCodeSize(
-            result.value().get(), include_liftoff, job_->dynamic_tiering_);
+            module.get(), include_liftoff, job_->dynamic_tiering_);
     job_->DoSync<AsyncCompileJob::PrepareAndStartCompile>(
-        std::move(result).value(), true, code_size_estimate);
+        std::move(module), true, code_size_estimate);
     return;
   }
 
@@ -3047,10 +2957,15 @@ void AsyncStreamingProcessor::OnFinishedStream(
   HandleScope scope(job_->isolate_);
   SaveAndSwitchContext saved_context(job_->isolate_, *job_->native_context_);
 
-  // Record the size of the wire bytes. In synchronous and asynchronous
-  // (non-streaming) compilation, this happens in {DecodeWasmModule}.
-  auto* histogram = job_->isolate_->counters()->wasm_wasm_module_size_bytes();
-  histogram->AddSample(job_->wire_bytes_.module_bytes().length());
+  // Record the size of the wire bytes and the number of functions. In
+  // synchronous and asynchronous (non-streaming) compilation, this happens in
+  // {DecodeWasmModule}.
+  auto* module_size_histogram =
+      job_->isolate_->counters()->wasm_wasm_module_size_bytes();
+  module_size_histogram->AddSample(job_->wire_bytes_.module_bytes().length());
+  auto* num_functions_histogram =
+      job_->isolate_->counters()->wasm_functions_per_wasm_module();
+  num_functions_histogram->AddSample(static_cast<int>(num_functions_));
 
   const bool has_code_section = job_->native_module_ != nullptr;
   bool cache_hit = false;
@@ -3059,11 +2974,10 @@ void AsyncStreamingProcessor::OnFinishedStream(
     // native module now (would otherwise happen in {PrepareAndStartCompile} or
     // {ProcessCodeSectionHeader}).
     constexpr size_t kCodeSizeEstimate = 0;
-    cache_hit = job_->GetOrCreateNativeModule(std::move(result).value(),
-                                              kCodeSizeEstimate);
+    cache_hit =
+        job_->GetOrCreateNativeModule(std::move(module), kCodeSizeEstimate);
   } else {
-    job_->native_module_->SetWireBytes(
-        {std::move(job_->bytes_copy_), job_->wire_bytes_.length()});
+    job_->native_module_->SetWireBytes(std::move(job_->bytes_copy_));
   }
   const bool needs_finish =
       job_->DecrementAndCheckFinisherCount(AsyncCompileJob::kStreamingDecoder);
@@ -3071,25 +2985,32 @@ void AsyncStreamingProcessor::OnFinishedStream(
   if (needs_finish) {
     const bool failed = job_->native_module_->compilation_state()->failed();
     if (!cache_hit) {
-      cache_hit = !GetWasmEngine()->UpdateNativeModuleCache(
-          failed, &job_->native_module_, job_->isolate_);
+      auto* prev_native_module = job_->native_module_.get();
+      job_->native_module_ = GetWasmEngine()->UpdateNativeModuleCache(
+          failed, std::move(job_->native_module_), job_->isolate_);
+      cache_hit = prev_native_module != job_->native_module_.get();
     }
+    // We finally call {Failed} or {FinishCompile}, which will invalidate the
+    // {AsyncCompileJob} and delete {this}.
     if (failed) {
-      job_->AsyncCompileFailed();
+      job_->Failed();
     } else {
       job_->FinishCompile(cache_hit);
     }
   }
 }
 
-// Report an error detected in the StreamingDecoder.
-void AsyncStreamingProcessor::OnError(const WasmError& error) {
-  TRACE_STREAMING("Stream error...\n");
-  FinishAsyncCompileJobWithError(error);
-}
-
 void AsyncStreamingProcessor::OnAbort() {
   TRACE_STREAMING("Abort stream...\n");
+  if (validate_functions_job_handle_) {
+    validate_functions_job_handle_->Cancel();
+    validate_functions_job_handle_.reset();
+  }
+  if (job_->native_module_ && job_->native_module_->wire_bytes().empty()) {
+    // Clean up the temporary cache entry.
+    GetWasmEngine()->StreamingCompilationFailed(prefix_hash_);
+  }
+  // {Abort} invalidates the {AsyncCompileJob}, which in turn deletes {this}.
   job_->Abort();
 }
 
@@ -3117,6 +3038,7 @@ bool AsyncStreamingProcessor::Deserialize(
       job_->isolate_->global_handles()->Create(*result.ToHandleChecked());
   job_->native_module_ = job_->module_object_->shared_native_module();
   job_->wire_bytes_ = ModuleWireBytes(job_->native_module_->wire_bytes());
+  // Calling {FinishCompile} deletes the {AsyncCompileJob} and {this}.
   job_->FinishCompile(false);
   return true;
 }
@@ -3131,12 +3053,22 @@ CompilationStateImpl::CompilationStateImpl(
       dynamic_tiering_(dynamic_tiering) {}
 
 void CompilationStateImpl::InitCompileJob() {
-  DCHECK_NULL(compile_job_);
+  DCHECK_NULL(js_to_wasm_wrapper_job_);
+  DCHECK_NULL(baseline_compile_job_);
+  DCHECK_NULL(top_tier_compile_job_);
   // Create the job, but don't spawn workers yet. This will happen on
   // {NotifyConcurrencyIncrease}.
-  compile_job_ = V8::GetCurrentPlatform()->CreateJob(
-      TaskPriority::kUserVisible, std::make_unique<BackgroundCompileJob>(
-                                      native_module_weak_, async_counters_));
+  js_to_wasm_wrapper_job_ = V8::GetCurrentPlatform()->CreateJob(
+      TaskPriority::kUserBlocking,
+      std::make_unique<AsyncCompileJSToWasmWrapperJob>(native_module_weak_));
+  baseline_compile_job_ = V8::GetCurrentPlatform()->CreateJob(
+      TaskPriority::kUserVisible,
+      std::make_unique<BackgroundCompileJob>(
+          native_module_weak_, async_counters_, CompilationTier::kBaseline));
+  top_tier_compile_job_ = V8::GetCurrentPlatform()->CreateJob(
+      TaskPriority::kUserVisible,
+      std::make_unique<BackgroundCompileJob>(
+          native_module_weak_, async_counters_, CompilationTier::kTopTier));
 }
 
 void CompilationStateImpl::CancelCompilation(
@@ -3256,7 +3188,8 @@ void CompilationStateImpl::InitializeCompilationProgress(
 
   // Compute the default compilation progress for all functions, and set it.
   const ExecutionTierPair default_tiers = GetDefaultTiersPerModule(
-      native_module_, dynamic_tiering_, IsLazyModule(module));
+      native_module_, dynamic_tiering_, native_module_->IsInDebugState(),
+      IsLazyModule(module));
   const uint8_t default_progress =
       RequiredBaselineTierField::encode(default_tiers.baseline_tier) |
       RequiredTopTierField::encode(default_tiers.top_tier) |
@@ -3289,7 +3222,7 @@ void CompilationStateImpl::InitializeCompilationProgress(
   TriggerCallbacks();
 }
 
-uint8_t CompilationStateImpl::AddCompilationUnitInternal(
+void CompilationStateImpl::AddCompilationUnitInternal(
     CompilationUnitBuilder* builder, int function_index,
     uint8_t function_progress) {
   ExecutionTier required_baseline_tier =
@@ -3300,26 +3233,6 @@ uint8_t CompilationStateImpl::AddCompilationUnitInternal(
   ExecutionTier reached_tier =
       CompilationStateImpl::ReachedTierField::decode(function_progress);
 
-  if (v8_flags.experimental_wasm_gc && !v8_flags.wasm_lazy_compilation) {
-    // The Turbofan optimizations we enable for WasmGC code can (for now)
-    // take a very long time, so skip Turbofan compilation for super-large
-    // functions.
-    // Besides, module serialization currently requires that all functions
-    // have been TF-compiled. By enabling this limit only for WasmGC, we
-    // make sure that non-experimental modules can be serialize as usual.
-    // TODO(jkummerow): This is a stop-gap solution to avoid excessive
-    // compile times. We would like to replace this hard threshold with
-    // a better solution (TBD) eventually.
-    constexpr uint32_t kMaxWasmFunctionSizeForTurbofan = 500 * KB;
-    uint32_t size = builder->module()->functions[function_index].code.length();
-    if (size > kMaxWasmFunctionSizeForTurbofan) {
-      required_baseline_tier = ExecutionTier::kLiftoff;
-      if (required_top_tier == ExecutionTier::kTurbofan) {
-        required_top_tier = ExecutionTier::kLiftoff;
-      }
-    }
-  }
-
   if (reached_tier < required_baseline_tier) {
     builder->AddBaselineUnit(function_index, required_baseline_tier);
   }
@@ -3327,28 +3240,18 @@ uint8_t CompilationStateImpl::AddCompilationUnitInternal(
       required_baseline_tier != required_top_tier) {
     builder->AddTopTierUnit(function_index, required_top_tier);
   }
-  return CompilationStateImpl::RequiredBaselineTierField::encode(
-             required_baseline_tier) |
-         CompilationStateImpl::RequiredTopTierField::encode(required_top_tier) |
-         CompilationStateImpl::ReachedTierField::encode(reached_tier);
 }
 
 void CompilationStateImpl::InitializeCompilationUnits(
     std::unique_ptr<CompilationUnitBuilder> builder) {
   int offset = native_module_->module()->num_imported_functions;
-  if (native_module_->IsTieredDown()) {
-    for (size_t i = 0; i < compilation_progress_.size(); ++i) {
-      int func_index = offset + static_cast<int>(i);
-      builder->AddDebugUnit(func_index);
-    }
-  } else {
+  {
     base::MutexGuard guard(&callbacks_mutex_);
 
-    for (size_t i = 0; i < compilation_progress_.size(); ++i) {
+    for (size_t i = 0, e = compilation_progress_.size(); i < e; ++i) {
       uint8_t function_progress = compilation_progress_[i];
       int func_index = offset + static_cast<int>(i);
-      compilation_progress_[i] = AddCompilationUnitInternal(
-          builder.get(), func_index, function_progress);
+      AddCompilationUnitInternal(builder.get(), func_index, function_progress);
     }
   }
   builder->Commit();
@@ -3356,10 +3259,6 @@ void CompilationStateImpl::InitializeCompilationUnits(
 
 void CompilationStateImpl::AddCompilationUnit(CompilationUnitBuilder* builder,
                                               int func_index) {
-  if (native_module_->IsTieredDown()) {
-    builder->AddDebugUnit(func_index);
-    return;
-  }
   int offset = native_module_->module()->num_imported_functions;
   int progress_index = func_index - offset;
   uint8_t function_progress;
@@ -3373,14 +3272,7 @@ void CompilationStateImpl::AddCompilationUnit(CompilationUnitBuilder* builder,
     base::MutexGuard guard(&callbacks_mutex_);
     function_progress = compilation_progress_[progress_index];
   }
-  uint8_t updated_function_progress =
-      AddCompilationUnitInternal(builder, func_index, function_progress);
-  if (updated_function_progress != function_progress) {
-    // This should happen very rarely (only for super-large functions), so we're
-    // not worried about overhead.
-    base::MutexGuard guard(&callbacks_mutex_);
-    compilation_progress_[progress_index] = updated_function_progress;
-  }
+  AddCompilationUnitInternal(builder, func_index, function_progress);
 }
 
 void CompilationStateImpl::InitializeCompilationProgressAfterDeserialization(
@@ -3419,8 +3311,6 @@ void CompilationStateImpl::InitializeCompilationProgressAfterDeserialization(
         RequiredTopTierField::encode(ExecutionTier::kNone) |
         ReachedTierField::encode(ExecutionTier::kNone);
     for (auto func_index : lazy_functions) {
-      native_module_->UseLazyStub(func_index);
-
       compilation_progress_[declared_function_index(module, func_index)] =
           kProgressForLazyFunctions;
     }
@@ -3428,7 +3318,8 @@ void CompilationStateImpl::InitializeCompilationProgressAfterDeserialization(
     // Update compilation state for eagerly compiled functions.
     constexpr bool kNotLazy = false;
     ExecutionTierPair default_tiers =
-        GetDefaultTiersPerModule(native_module_, dynamic_tiering_, kNotLazy);
+        GetDefaultTiersPerModule(native_module_, dynamic_tiering_,
+                                 native_module_->IsInDebugState(), kNotLazy);
     uint8_t progress_for_eager_functions =
         RequiredBaselineTierField::encode(default_tiers.baseline_tier) |
         RequiredTopTierField::encode(default_tiers.top_tier) |
@@ -3448,7 +3339,7 @@ void CompilationStateImpl::InitializeCompilationProgressAfterDeserialization(
     // that as finished already. Baseline compilation is done if we do not have
     // any Liftoff functions to compile.
     finished_events_.Add(CompilationEvent::kFinishedExportWrappers);
-    if (eager_functions.empty()) {
+    if (eager_functions.empty() || v8_flags.wasm_lazy_compilation) {
       finished_events_.Add(CompilationEvent::kFinishedBaselineCompilation);
     }
   }
@@ -3457,87 +3348,6 @@ void CompilationStateImpl::InitializeCompilationProgressAfterDeserialization(
   WaitForCompilationEvent(CompilationEvent::kFinishedBaselineCompilation);
 }
 
-void CompilationStateImpl::InitializeRecompilation(
-    TieringState new_tiering_state,
-    std::unique_ptr<CompilationEventCallback> recompilation_finished_callback) {
-  DCHECK(!failed());
-
-  // Hold the mutex as long as possible, to synchronize between multiple
-  // recompilations that are triggered at the same time (e.g. when the profiler
-  // is disabled).
-  base::Optional<base::MutexGuard> guard(&callbacks_mutex_);
-
-  // As long as there are outstanding recompilation functions, take part in
-  // compilation. This is to avoid recompiling for the same tier or for
-  // different tiers concurrently. Note that the compilation unit queues can run
-  // empty before {outstanding_recompilation_functions_} drops to zero. In this
-  // case, we do not wait for the last running compilation threads to finish
-  // their units, but just start our own recompilation already.
-  while (outstanding_recompilation_functions_ > 0 &&
-         compilation_unit_queues_.GetTotalSize() > 0) {
-    guard.reset();
-    constexpr JobDelegate* kNoDelegate = nullptr;
-    ExecuteCompilationUnits(native_module_weak_, async_counters_.get(),
-                            kNoDelegate, kBaselineOrTopTier);
-    guard.emplace(&callbacks_mutex_);
-  }
-
-  // Information about compilation progress is shared between this class and the
-  // NativeModule. Before updating information here, consult the NativeModule to
-  // find all functions that need recompilation.
-  // Since the current tiering state is updated on the NativeModule before
-  // triggering recompilation, it's OK if the information is slightly outdated.
-  // If we compile functions twice, the NativeModule will ignore all redundant
-  // code (or code compiled for the wrong tier).
-  std::vector<int> recompile_function_indexes =
-      native_module_->FindFunctionsToRecompile(new_tiering_state);
-
-  callbacks_.emplace_back(std::move(recompilation_finished_callback));
-  tiering_state_ = new_tiering_state;
-
-  // If compilation progress is not initialized yet, then compilation didn't
-  // start yet, and new code will be kept tiered-down from the start. For
-  // streaming compilation, there is a special path to tier down later, when
-  // the module is complete. In any case, we don't need to recompile here.
-  base::Optional<CompilationUnitBuilder> builder;
-  if (compilation_progress_.size() > 0) {
-    builder.emplace(native_module_);
-    const WasmModule* module = native_module_->module();
-    DCHECK_EQ(module->num_declared_functions, compilation_progress_.size());
-    DCHECK_GE(module->num_declared_functions,
-              recompile_function_indexes.size());
-    outstanding_recompilation_functions_ =
-        static_cast<int>(recompile_function_indexes.size());
-    // Restart recompilation if another recompilation is already happening.
-    for (auto& progress : compilation_progress_) {
-      progress = MissingRecompilationField::update(progress, false);
-    }
-    auto new_tier = new_tiering_state == kTieredDown ? ExecutionTier::kLiftoff
-                                                     : ExecutionTier::kTurbofan;
-    int imported = module->num_imported_functions;
-    // Generate necessary compilation units on the fly.
-    for (int function_index : recompile_function_indexes) {
-      DCHECK_LE(imported, function_index);
-      int slot_index = function_index - imported;
-      auto& progress = compilation_progress_[slot_index];
-      progress = MissingRecompilationField::update(progress, true);
-      builder->AddRecompilationUnit(function_index, new_tier);
-    }
-  }
-
-  // Trigger callback if module needs no recompilation.
-  if (outstanding_recompilation_functions_ == 0) {
-    TriggerCallbacks(base::EnumSet<CompilationEvent>(
-        {CompilationEvent::kFinishedRecompilation}));
-  }
-
-  if (builder.has_value()) {
-    // Avoid holding lock while scheduling a compile job.
-    guard.reset();
-    builder->Commit();
-  }
-}
-
 void CompilationStateImpl::AddCallback(
     std::unique_ptr<CompilationEventCallback> callback) {
   base::MutexGuard callbacks_guard(&callbacks_mutex_);
@@ -3571,13 +3381,22 @@ void CompilationStateImpl::CommitCompilationUnits(
     // are available to other threads doing an acquire load.
     outstanding_js_to_wasm_wrappers_.store(js_to_wasm_wrapper_units.size(),
                                            std::memory_order_release);
+    DCHECK(js_to_wasm_wrapper_job_->IsValid());
+    js_to_wasm_wrapper_job_->NotifyConcurrencyIncrease();
   }
   if (!baseline_units.empty() || !top_tier_units.empty()) {
     compilation_unit_queues_.AddUnits(baseline_units, top_tier_units,
                                       native_module_->module());
   }
   ResetPKUPermissionsForThreadSpawning pku_reset_scope;
-  compile_job_->NotifyConcurrencyIncrease();
+  if (!baseline_units.empty()) {
+    DCHECK(baseline_compile_job_->IsValid());
+    baseline_compile_job_->NotifyConcurrencyIncrease();
+  }
+  if (!top_tier_units.empty()) {
+    DCHECK(top_tier_compile_job_->IsValid());
+    top_tier_compile_job_->NotifyConcurrencyIncrease();
+  }
 }
 
 void CompilationStateImpl::CommitTopTierCompilationUnit(
@@ -3592,7 +3411,7 @@ void CompilationStateImpl::AddTopTierPriorityCompilationUnit(
   // {NotifyConcurrencyIncrease} can spawn new threads which could inherit PKU
   // permissions (which would be a security issue).
   DCHECK(!CodeSpaceWriteScope::IsInScope());
-  compile_job_->NotifyConcurrencyIncrease();
+  top_tier_compile_job_->NotifyConcurrencyIncrease();
 }
 
 std::shared_ptr<JSToWasmWrapperCompilationUnit>
@@ -3626,12 +3445,18 @@ void CompilationStateImpl::FinalizeJSToWasmWrappers(Isolate* isolate,
   CodePageCollectionMemoryModificationScope modification_scope(isolate->heap());
   for (auto& unit : js_to_wasm_wrapper_units_) {
     DCHECK_EQ(isolate, unit->isolate());
-    Handle<CodeT> code = unit->Finalize();
+    // Note: The code is either the compiled signature-specific wrapper or the
+    // generic wrapper built-in.
+    Handle<Code> code = unit->Finalize();
     uint32_t index =
         GetExportWrapperIndex(unit->canonical_sig_index(), unit->is_import());
     isolate->heap()->js_to_wasm_wrappers().Set(index,
                                                MaybeObject::FromObject(*code));
-    RecordStats(*code, isolate->counters());
+    if (!code->is_builtin()) {
+      // Do not increase code stats for non-jitted wrappers.
+      RecordStats(*code, isolate->counters());
+      isolate->counters()->wasm_compiled_export_wrapper()->Increment(1);
+    }
   }
 }
 
@@ -3642,8 +3467,8 @@ CompilationUnitQueues::Queue* CompilationStateImpl::GetQueueForCompileTask(
 
 base::Optional<WasmCompilationUnit>
 CompilationStateImpl::GetNextCompilationUnit(
-    CompilationUnitQueues::Queue* queue, CompileBaselineOnly baseline_only) {
-  return compilation_unit_queues_.GetNextUnit(queue, baseline_only);
+    CompilationUnitQueues::Queue* queue, CompilationTier tier) {
+  return compilation_unit_queues_.GetNextUnit(queue, tier);
 }
 
 void CompilationStateImpl::OnFinishedUnits(
@@ -3662,8 +3487,6 @@ void CompilationStateImpl::OnFinishedUnits(
   DCHECK_EQ(compilation_progress_.size(),
             native_module_->module()->num_declared_functions);
 
-  base::EnumSet<CompilationEvent> triggered_events;
-
   for (size_t i = 0; i < code_vector.size(); i++) {
     WasmCode* code = code_vector[i];
     DCHECK_NOT_NULL(code);
@@ -3699,25 +3522,6 @@ void CompilationStateImpl::OnFinishedUnits(
         bytes_since_last_chunk_ += code->instructions().size();
       }
 
-      if (V8_UNLIKELY(MissingRecompilationField::decode(function_progress))) {
-        DCHECK_LT(0, outstanding_recompilation_functions_);
-        // If tiering up, accept any TurboFan code. For tiering down, look at
-        // the {for_debugging} flag. The tier can be Liftoff or TurboFan and is
-        // irrelevant here. In particular, we want to ignore any outstanding
-        // non-debugging units.
-        bool matches = tiering_state_ == kTieredDown
-                           ? code->for_debugging()
-                           : code->tier() == ExecutionTier::kTurbofan;
-        if (matches) {
-          outstanding_recompilation_functions_--;
-          compilation_progress_[slot_index] = MissingRecompilationField::update(
-              compilation_progress_[slot_index], false);
-          if (outstanding_recompilation_functions_ == 0) {
-            triggered_events.Add(CompilationEvent::kFinishedRecompilation);
-          }
-        }
-      }
-
       // Update function's compilation progress.
       if (code->tier() > reached_tier) {
         compilation_progress_[slot_index] = ReachedTierField::update(
@@ -3727,7 +3531,7 @@ void CompilationStateImpl::OnFinishedUnits(
     }
   }
 
-  TriggerCallbacks(triggered_events);
+  TriggerCallbacks();
 }
 
 void CompilationStateImpl::OnFinishedJSToWasmWrapperUnits(int num) {
@@ -3738,10 +3542,10 @@ void CompilationStateImpl::OnFinishedJSToWasmWrapperUnits(int num) {
   TriggerCallbacks();
 }
 
-void CompilationStateImpl::TriggerCallbacks(
-    base::EnumSet<CompilationEvent> triggered_events) {
+void CompilationStateImpl::TriggerCallbacks() {
   DCHECK(!callbacks_mutex_.TryLock());
 
+  base::EnumSet<CompilationEvent> triggered_events;
   if (outstanding_export_wrappers_ == 0) {
     triggered_events.Add(CompilationEvent::kFinishedExportWrappers);
     if (outstanding_baseline_units_ == 0) {
@@ -3767,11 +3571,9 @@ void CompilationStateImpl::TriggerCallbacks(
 
   // Don't trigger past events again.
   triggered_events -= finished_events_;
-  // Recompilation can happen multiple times, thus do not store this. There can
-  // also be multiple compilation chunks.
-  finished_events_ |= triggered_events -
-                      CompilationEvent::kFinishedRecompilation -
-                      CompilationEvent::kFinishedCompilationChunk;
+  // There can be multiple compilation chunks, thus do not store this.
+  finished_events_ |=
+      triggered_events - CompilationEvent::kFinishedCompilationChunk;
 
   for (auto event :
        {std::make_pair(CompilationEvent::kFailedCompilation,
@@ -3781,9 +3583,7 @@ void CompilationStateImpl::TriggerCallbacks(
         std::make_pair(CompilationEvent::kFinishedBaselineCompilation,
                        "wasm.BaselineFinished"),
         std::make_pair(CompilationEvent::kFinishedCompilationChunk,
-                       "wasm.CompilationChunkFinished"),
-        std::make_pair(CompilationEvent::kFinishedRecompilation,
-                       "wasm.RecompilationFinished")}) {
+                       "wasm.CompilationChunkFinished")}) {
     if (!triggered_events.contains(event.first)) continue;
     DCHECK_NE(compilation_id_, kInvalidCompilationID);
     TRACE_EVENT1("v8.wasm", event.second, "id", compilation_id_);
@@ -3792,8 +3592,7 @@ void CompilationStateImpl::TriggerCallbacks(
     }
   }
 
-  if (outstanding_baseline_units_ == 0 && outstanding_export_wrappers_ == 0 &&
-      outstanding_recompilation_functions_ == 0) {
+  if (outstanding_baseline_units_ == 0 && outstanding_export_wrappers_ == 0) {
     auto new_end = std::remove_if(
         callbacks_.begin(), callbacks_.end(), [](const auto& callback) {
           return callback->release_after_final_event();
@@ -3833,7 +3632,7 @@ void CompilationStateImpl::PublishCompilationResults(
           native_module_->module()
               ->isorecursive_canonical_type_ids[function.sig_index];
       WasmImportWrapperCache::CacheKey key(
-          compiler::kDefaultImportCallKind, canonical_type_index,
+          kDefaultImportCallKind, canonical_type_index,
           static_cast<int>(function.sig->parameter_count()), kNoSuspend);
       // If two imported functions have the same key, only one of them should
       // have been added as a compilation unit. So it is always the first time
@@ -3889,11 +3688,13 @@ void CompilationStateImpl::SchedulePublishCompilationResults(
   }
 }
 
-size_t CompilationStateImpl::NumOutstandingCompilations() const {
-  size_t outstanding_wrappers =
-      outstanding_js_to_wasm_wrappers_.load(std::memory_order_relaxed);
-  size_t outstanding_functions = compilation_unit_queues_.GetTotalSize();
-  return outstanding_wrappers + outstanding_functions;
+size_t CompilationStateImpl::NumOutstandingExportWrappers() const {
+  return outstanding_js_to_wasm_wrappers_.load(std::memory_order_relaxed);
+}
+
+size_t CompilationStateImpl::NumOutstandingCompilations(
+    CompilationTier tier) const {
+  return compilation_unit_queues_.GetSizeForTier(tier);
 }
 
 void CompilationStateImpl::SetError() {
@@ -3909,61 +3710,62 @@ void CompilationStateImpl::SetError() {
 
 void CompilationStateImpl::WaitForCompilationEvent(
     CompilationEvent expect_event) {
-  class WaitForCompilationEventCallback : public CompilationEventCallback {
-   public:
-    WaitForCompilationEventCallback(std::shared_ptr<base::Semaphore> semaphore,
-                                    std::shared_ptr<std::atomic<bool>> done,
-                                    base::EnumSet<CompilationEvent> events)
-        : semaphore_(std::move(semaphore)),
-          done_(std::move(done)),
-          events_(events) {}
-
-    void call(CompilationEvent event) override {
-      if (!events_.contains(event)) return;
-      done_->store(true, std::memory_order_relaxed);
-      semaphore_->Signal();
-    }
-
-   private:
-    std::shared_ptr<base::Semaphore> semaphore_;
-    std::shared_ptr<std::atomic<bool>> done_;
-    base::EnumSet<CompilationEvent> events_;
-  };
-
-  auto semaphore = std::make_shared<base::Semaphore>(0);
-  auto done = std::make_shared<std::atomic<bool>>(false);
+  switch (expect_event) {
+    case CompilationEvent::kFinishedExportWrappers:
+      break;
+    case CompilationEvent::kFinishedBaselineCompilation:
+      if (baseline_compile_job_->IsValid()) baseline_compile_job_->Join();
+      break;
+    default:
+      // Waiting on other CompilationEvent doesn't make sense.
+      UNREACHABLE();
+  }
+  if (js_to_wasm_wrapper_job_->IsValid()) js_to_wasm_wrapper_job_->Join();
+#ifdef DEBUG
   base::EnumSet<CompilationEvent> events{expect_event,
                                          CompilationEvent::kFailedCompilation};
-  {
-    base::MutexGuard callbacks_guard(&callbacks_mutex_);
-    if (finished_events_.contains_any(events)) return;
-    callbacks_.emplace_back(std::make_unique<WaitForCompilationEventCallback>(
-        semaphore, done, events));
-  }
-
-  class WaitForEventDelegate final : public JobDelegate {
-   public:
-    explicit WaitForEventDelegate(std::shared_ptr<std::atomic<bool>> done)
-        : done_(std::move(done)) {}
+  base::MutexGuard guard(&callbacks_mutex_);
+  DCHECK(finished_events_.contains_any(events));
+#endif
+}
 
-    bool ShouldYield() override {
-      return done_->load(std::memory_order_relaxed);
+void CompilationStateImpl::TierUpAllFunctions() {
+  const WasmModule* module = native_module_->module();
+  uint32_t num_wasm_functions = module->num_declared_functions;
+  WasmCodeRefScope code_ref_scope;
+  CompilationUnitBuilder builder(native_module_);
+  for (uint32_t i = 0; i < num_wasm_functions; ++i) {
+    int func_index = module->num_imported_functions + i;
+    WasmCode* code = native_module_->GetCode(func_index);
+    if (!code || !code->is_turbofan()) {
+      builder.AddTopTierUnit(func_index, ExecutionTier::kTurbofan);
     }
+  }
+  builder.Commit();
 
+  // Join the compilation, until no compilation units are left anymore.
+  class DummyDelegate final : public JobDelegate {
+    bool ShouldYield() override { return false; }
     bool IsJoiningThread() const override { return true; }
-
     void NotifyConcurrencyIncrease() override { UNIMPLEMENTED(); }
-
     uint8_t GetTaskId() override { return kMainTaskId; }
-
-   private:
-    std::shared_ptr<std::atomic<bool>> done_;
   };
 
-  WaitForEventDelegate delegate{done};
+  DummyDelegate delegate;
   ExecuteCompilationUnits(native_module_weak_, async_counters_.get(), &delegate,
-                          kBaselineOnly);
-  semaphore->Wait();
+                          CompilationTier::kTopTier);
+
+  // We cannot wait for other compilation threads to finish, so we explicitly
+  // compile all functions which are not yet available as TurboFan code.
+  for (uint32_t i = 0; i < num_wasm_functions; ++i) {
+    uint32_t func_index = module->num_imported_functions + i;
+    WasmCode* code = native_module_->GetCode(func_index);
+    if (!code || !code->is_turbofan()) {
+      wasm::GetWasmEngine()->CompileFunction(async_counters_.get(),
+                                             native_module_, func_index,
+                                             wasm::ExecutionTier::kTurbofan);
+    }
+  }
 }
 
 namespace {
@@ -4028,7 +3830,7 @@ void CompileJsToWasmWrappers(Isolate* isolate, const WasmModule* module) {
         module->isorecursive_canonical_type_ids[function.sig_index];
     int wrapper_index =
         GetExportWrapperIndex(canonical_type_index, function.imported);
-    auto existing_wrapper =
+    MaybeObject existing_wrapper =
         isolate->heap()->js_to_wasm_wrappers().Get(wrapper_index);
     if (existing_wrapper.IsStrongOrWeak() &&
         !existing_wrapper.GetHeapObject().IsUndefined()) {
@@ -4073,19 +3875,22 @@ void CompileJsToWasmWrappers(Isolate* isolate, const WasmModule* module) {
     JSToWasmWrapperKey key = pair.first;
     JSToWasmWrapperCompilationUnit* unit = pair.second.get();
     DCHECK_EQ(isolate, unit->isolate());
-    Handle<CodeT> code = unit->Finalize();
+    Handle<Code> code = unit->Finalize();
     int wrapper_index = GetExportWrapperIndex(key.second, key.first);
     isolate->heap()->js_to_wasm_wrappers().Set(
         wrapper_index, HeapObjectReference::Strong(*code));
-    RecordStats(*code, isolate->counters());
+    if (!code->is_builtin()) {
+      // Do not increase code stats for non-jitted wrappers.
+      RecordStats(*code, isolate->counters());
+      isolate->counters()->wasm_compiled_export_wrapper()->Increment(1);
+    }
   }
 }
 
 WasmCode* CompileImportWrapper(
-    NativeModule* native_module, Counters* counters,
-    compiler::WasmImportCallKind kind, const FunctionSig* sig,
-    uint32_t canonical_type_index, int expected_arity, Suspend suspend,
-    WasmImportWrapperCache::ModificationScope* cache_scope) {
+    NativeModule* native_module, Counters* counters, ImportCallKind kind,
+    const FunctionSig* sig, uint32_t canonical_type_index, int expected_arity,
+    Suspend suspend, WasmImportWrapperCache::ModificationScope* cache_scope) {
   // Entry should exist, so that we don't insert a new one and invalidate
   // other threads' iterators/references, but it should not have been compiled
   // yet.
@@ -4106,7 +3911,7 @@ WasmCode* CompileImportWrapper(
         result.tagged_parameter_slots,
         result.protected_instructions_data.as_vector(),
         result.source_positions.as_vector(), GetCodeKind(result),
-        ExecutionTier::kNone, kNoDebugging);
+        ExecutionTier::kNone, kNotForDebugging);
     published_code = native_module->PublishCode(std::move(wasm_code));
   }
   (*cache_scope)[key] = published_code;