Make the rustc respect the `-C codegen-units` flag in incremental mode.

Before this commit `-C codegen-units` would just get silently be ignored if `-C incremental` was specified too. After this commit one can control the number of codegen units generated during incremental compilation. The default is rather high at 256, so most crates won't see a difference unless explicitly opting into a lower count.
author: Michael Woerister <michaelwoerister@posteo> 2020-03-19 17:14:02 +0100
committer: Michael Woerister <michaelwoerister@posteo> 2020-03-31 16:11:16 +0200
commit: 1e5b4594e1aad47fccd855fa54dd40a84d07dbdf (patch)
tree: e1bfa779e0645444d337d020493b54cafb9e6e64
parent: 2113659479a82ea69633b23ef710b58ab127755e (diff)
download: rust-1e5b4594e1aad47fccd855fa54dd40a84d07dbdf.tar.gz
2 files changed, 67 insertions, 24 deletions
diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs
index ba01e370aae..9068c0541a4 100644
--- a/src/librustc_mir/monomorphize/partitioning.rs
+++ b/src/librustc_mir/monomorphize/partitioning.rs
@@ -107,19 +107,11 @@ use rustc_middle::mir::mono::{InstantiationMode, MonoItem};
 use rustc_middle::ty::print::characteristic_def_id_of_type;
 use rustc_middle::ty::query::Providers;
 use rustc_middle::ty::{self, DefIdTree, InstanceDef, TyCtxt};
-use rustc_span::symbol::Symbol;
+use rustc_span::symbol::{Symbol, SymbolStr};
 
 use crate::monomorphize::collector::InliningMap;
 use crate::monomorphize::collector::{self, MonoItemCollectionMode};
 
-pub enum PartitioningStrategy {
-    /// Generates one codegen unit per source-level module.
-    PerModule,
-
-    /// Partition the whole crate into a fixed number of codegen units.
-    FixedUnitCount(usize),
-}
-
 // Anything we can't find a proper codegen unit for goes into this.
 fn fallback_cgu_name(name_builder: &mut CodegenUnitNameBuilder<'_>) -> Symbol {
     name_builder.build_cgu_name(LOCAL_CRATE, &["fallback"], Some("cgu"))
@@ -128,7 +120,7 @@ fn fallback_cgu_name(name_builder: &mut CodegenUnitNameBuilder<'_>) -> Symbol {
 pub fn partition<'tcx, I>(
     tcx: TyCtxt<'tcx>,
     mono_items: I,
-    strategy: PartitioningStrategy,
+    max_cgu_count: usize,
     inlining_map: &InliningMap<'tcx>,
 ) -> Vec<CodegenUnit<'tcx>>
 where
@@ -148,11 +140,10 @@ where
 
     debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter());
 
-    // If the partitioning should produce a fixed count of codegen units, merge
-    // until that count is reached.
-    if let PartitioningStrategy::FixedUnitCount(count) = strategy {
+    // Merge until we have at most `max_cgu_count` codegen units.
+    {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
-        merge_codegen_units(tcx, &mut initial_partitioning, count);
+        merge_codegen_units(tcx, &mut initial_partitioning, max_cgu_count);
         debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter());
     }
 
@@ -480,6 +471,10 @@ fn merge_codegen_units<'tcx>(
     // the stable sort below will keep everything nice and deterministic.
     codegen_units.sort_by_cached_key(|cgu| cgu.name().as_str());
 
+    // This map keeps track of what got merged into what.
+    let mut cgu_contents: FxHashMap<Symbol, Vec<SymbolStr>> =
+        codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name().as_str()])).collect();
+
     // Merge the two smallest codegen units until the target size is reached.
     while codegen_units.len() > target_cgu_count {
         // Sort small cgus to the back
@@ -487,20 +482,67 @@ fn merge_codegen_units<'tcx>(
         let mut smallest = codegen_units.pop().unwrap();
         let second_smallest = codegen_units.last_mut().unwrap();
 
+        // Move the mono-items from `smallest` to `second_smallest`
         second_smallest.modify_size_estimate(smallest.size_estimate());
         for (k, v) in smallest.items_mut().drain() {
             second_smallest.items_mut().insert(k, v);
         }
+
+        // Record that `second_smallest` now contains all the stuff that was in
+        // `smallest` before.
+        let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
+        cgu_contents.get_mut(&second_smallest.name()).unwrap().extend(consumed_cgu_names.drain(..));
+
         debug!(
-            "CodegenUnit {} merged in to CodegenUnit {}",
+            "CodegenUnit {} merged into CodegenUnit {}",
             smallest.name(),
             second_smallest.name()
         );
     }
 
     let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
-    for (index, cgu) in codegen_units.iter_mut().enumerate() {
-        cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index));
+
+    if tcx.sess.opts.incremental.is_some() {
+        // If we are doing incremental compilation, we want CGU names to
+        // reflect the path of the source level module they correspond to.
+        // For CGUs that contain the code of multiple modules because of the
+        // merging done above, we use a concatenation of the names of
+        // all contained CGUs.
+        let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
+            .into_iter()
+            // This `filter` makes sure we only update the name of CGUs that
+            // were actually modified by merging.
+            .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
+            .map(|(current_cgu_name, cgu_contents)| {
+                let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| &s[..]).collect();
+
+                // Sort the names, so things are deterministic and easy to
+                // predict.
+                cgu_contents.sort();
+
+                (current_cgu_name, cgu_contents.join("--"))
+            })
+            .collect();
+
+        for cgu in codegen_units.iter_mut() {
+            if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
+                if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
+                    cgu.set_name(Symbol::intern(&new_cgu_name));
+                } else {
+                    // If we don't require CGU names to be human-readable, we
+                    // use a fixed length hash of the composite CGU name
+                    // instead.
+                    let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name);
+                    cgu.set_name(Symbol::intern(&new_cgu_name));
+                }
+            }
+        }
+    } else {
+        // If we are compiling non-incrementally we just generate simple CGU
+        // names containing an index.
+        for (index, cgu) in codegen_units.iter_mut().enumerate() {
+            cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index));
+        }
     }
 }
 
@@ -879,13 +921,7 @@ fn collect_and_partition_mono_items(
     let (codegen_units, _) = tcx.sess.time("partition_and_assert_distinct_symbols", || {
         sync::join(
             || {
-                let strategy = if tcx.sess.opts.incremental.is_some() {
-                    PartitioningStrategy::PerModule
-                } else {
-                    PartitioningStrategy::FixedUnitCount(tcx.sess.codegen_units())
-                };
-
-                partition(tcx, items.iter().cloned(), strategy, &inlining_map)
+                partition(tcx, items.iter().cloned(), tcx.sess.codegen_units(), &inlining_map)
                     .into_iter()
                     .map(Arc::new)
                     .collect::<Vec<_>>()
diff --git a/src/librustc_session/session.rs b/src/librustc_session/session.rs
index b3d75143c56..7f8b55d9d76 100644
--- a/src/librustc_session/session.rs
+++ b/src/librustc_session/session.rs
@@ -758,6 +758,13 @@ impl Session {
             return n as usize;
         }
 
+        // If incremental compilation is turned on, we default to a high number
+        // codegen units in order to reduce the "collateral damage" small
+        // changes cause.
+        if self.opts.incremental.is_some() {
+            return 256;
+        }
+
         // Why is 16 codegen units the default all the time?
         //
         // The main reason for enabling multiple codegen units by default is to
author	Michael Woerister <michaelwoerister@posteo>	2020-03-19 17:14:02 +0100
committer	Michael Woerister <michaelwoerister@posteo>	2020-03-31 16:11:16 +0200
commit	1e5b4594e1aad47fccd855fa54dd40a84d07dbdf (patch)
tree	e1bfa779e0645444d337d020493b54cafb9e6e64
parent	2113659479a82ea69633b23ef710b58ab127755e (diff)
download	rust-1e5b4594e1aad47fccd855fa54dd40a84d07dbdf.tar.gz