diff options
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/BasicAliasAnalysis.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/BlockFrequencyInfo.cpp | 5 | ||||
-rw-r--r-- | lib/Analysis/BlockFrequencyInfoImpl.cpp | 21 | ||||
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 45 | ||||
-rw-r--r-- | lib/Analysis/ModuleSummaryAnalysis.cpp | 9 | ||||
-rw-r--r-- | lib/Analysis/TypeBasedAliasAnalysis.cpp | 178 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 13 |
7 files changed, 177 insertions, 100 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 4a6abae8e971..fb9ece2bd206 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1672,9 +1672,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, // If both pointers are pointing into the same object and one of them // accesses the entire object, then the accesses must overlap in some way. if (O1 == O2) - if ((V1Size != MemoryLocation::UnknownSize && - isObjectSize(O1, V1Size, DL, TLI)) || - (V2Size != MemoryLocation::UnknownSize && + if (V1Size != MemoryLocation::UnknownSize && + V2Size != MemoryLocation::UnknownSize && + (isObjectSize(O1, V1Size, DL, TLI) || isObjectSize(O2, V2Size, DL, TLI))) return AliasCache[Locs] = PartialAlias; diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 5d2170dcf155..41c295895213 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -218,6 +218,11 @@ BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { return BFI->getProfileCountFromFreq(*getFunction(), Freq); } +bool BlockFrequencyInfo::isIrrLoopHeader(const BasicBlock *BB) { + assert(BFI && "Expected analysis to be available"); + return BFI->isIrrLoopHeader(BB); +} + void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) { assert(BFI && "Expected analysis to be available"); BFI->setBlockFreq(BB, Freq); diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 1030407b766d..7e323022d9ce 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -271,6 +271,7 @@ void BlockFrequencyInfoImplBase::clear() { // Swap with a default-constructed std::vector, since std::vector<>::clear() // does not actually clear heap storage. std::vector<FrequencyData>().swap(Freqs); + IsIrrLoopHeader.clear(); std::vector<WorkingData>().swap(Working); Loops.clear(); } @@ -280,8 +281,10 @@ void BlockFrequencyInfoImplBase::clear() { /// Releases all memory not used downstream. In particular, saves Freqs. static void cleanup(BlockFrequencyInfoImplBase &BFI) { std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); + SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader)); BFI.clear(); BFI.Freqs = std::move(SavedFreqs); + BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader); } bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, @@ -572,6 +575,13 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, return BlockCount.getLimitedValue(); } +bool +BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) { + if (!Node.isValid()) + return false; + return IsIrrLoopHeader.test(Node.Index); +} + Scaled64 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { if (!Node.isValid()) @@ -819,3 +829,14 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); } } + +void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) { + BlockMass LoopMass = BlockMass::getFull(); + DitheringDistributer D(Dist, LoopMass); + for (const Weight &W : Dist.Weights) { + BlockMass Taken = D.takeMass(W.Amount); + assert(W.Type == Weight::Local && "all weights should be local"); + Working[W.TargetNode.Index].getMass() = Taken; + DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); + } +} diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 19889658b13c..e141d6c58b65 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -2136,8 +2136,51 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { if (!Stride) return; - DEBUG(dbgs() << "LAA: Found a strided access that we can version"); + DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " + "versioning:"); DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + + // Avoid adding the "Stride == 1" predicate when we know that + // Stride >= Trip-Count. Such a predicate will effectively optimize a single + // or zero iteration loop, as Trip-Count <= Stride == 1. + // + // TODO: We are currently not making a very informed decision on when it is + // beneficial to apply stride versioning. It might make more sense that the + // users of this analysis (such as the vectorizer) will trigger it, based on + // their specific cost considerations; For example, in cases where stride + // versioning does not help resolving memory accesses/dependences, the + // vectorizer should evaluate the cost of the runtime test, and the benefit + // of various possible stride specializations, considering the alternatives + // of using gather/scatters (if available). + + const SCEV *StrideExpr = PSE->getSCEV(Stride); + const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); + + // Match the types so we can compare the stride and the BETakenCount. + // The Stride can be positive/negative, so we sign extend Stride; + // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. + const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); + uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); + uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); + const SCEV *CastedStride = StrideExpr; + const SCEV *CastedBECount = BETakenCount; + ScalarEvolution *SE = PSE->getSE(); + if (BETypeSize >= StrideTypeSize) + CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); + else + CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); + const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); + // Since TripCount == BackEdgeTakenCount + 1, checking: + // "Stride >= TripCount" is equivalent to checking: + // Stride - BETakenCount > 0 + if (SE->isKnownPositive(StrideMinusBETaken)) { + DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " + "Stride==1 predicate will imply that the loop executes " + "at most once.\n"); + return; + } + DEBUG(dbgs() << "LAA: Found a strided access that we can version."); + SymbolicStrides[Ptr] = Stride; StrideSet.insert(Stride); } diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index afd575e7273c..82db09ca97b0 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -303,7 +303,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. F.isVarArg(); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* Live = */ false); + /* Live = */ false, F.isDSOLocal()); FunctionSummary::FFlags FunFlags{ F.hasFnAttribute(Attribute::ReadNone), F.hasFnAttribute(Attribute::ReadOnly), @@ -329,7 +329,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* Live = */ false); + /* Live = */ false, V.isDSOLocal()); auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -342,7 +342,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet<GlobalValue::GUID> &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* Live = */ false); + /* Live = */ false, A.isDSOLocal()); auto AS = llvm::make_unique<AliasSummary>(Flags); auto *Aliasee = A.getBaseObject(); auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); @@ -410,7 +410,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( assert(GV->isDeclaration() && "Def in module asm already has definition"); GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, /* NotEligibleToImport = */ true, - /* Live = */ true); + /* Live = */ true, + /* Local */ GV->isDSOLocal()); CantBePromoted.insert(GlobalValue::getGUID(Name)); // Create the appropriate summary type. if (Function *F = dyn_cast<Function>(GV)) { diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 3a3a7ad39554..1e36e314b864 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -314,17 +314,8 @@ AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, if (!EnableTBAA) return AAResultBase::alias(LocA, LocB); - // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must - // be conservative. - const MDNode *AM = LocA.AATags.TBAA; - if (!AM) - return AAResultBase::alias(LocA, LocB); - const MDNode *BM = LocB.AATags.TBAA; - if (!BM) - return AAResultBase::alias(LocA, LocB); - - // If they may alias, chain to the next AliasAnalysis. - if (Aliases(AM, BM)) + // If accesses may alias, chain to the next AliasAnalysis. + if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA)) return AAResultBase::alias(LocA, LocB); // Otherwise return a definitive result. @@ -424,25 +415,24 @@ bool MDNode::isTBAAVtableAccess() const { return false; } +static bool matchAccessTags(const MDNode *A, const MDNode *B, + const MDNode **GenericTag = nullptr); + MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { + const MDNode *GenericTag; + matchAccessTags(A, B, &GenericTag); + return const_cast<MDNode*>(GenericTag); +} + +static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) { if (!A || !B) return nullptr; if (A == B) return A; - // For struct-path aware TBAA, we use the access type of the tag. - assert(isStructPathTBAA(A) && isStructPathTBAA(B) && - "Auto upgrade should have taken care of this!"); - A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType()); - if (!A) - return nullptr; - B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType()); - if (!B) - return nullptr; - - SmallSetVector<MDNode *, 4> PathA; - MutableTBAANode TA(A); + SmallSetVector<const MDNode *, 4> PathA; + TBAANode TA(A); while (TA.getNode()) { if (PathA.count(TA.getNode())) report_fatal_error("Cycle found in TBAA metadata."); @@ -450,8 +440,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { TA = TA.getParent(); } - SmallSetVector<MDNode *, 4> PathB; - MutableTBAANode TB(B); + SmallSetVector<const MDNode *, 4> PathB; + TBAANode TB(B); while (TB.getNode()) { if (PathB.count(TB.getNode())) report_fatal_error("Cycle found in TBAA metadata."); @@ -462,7 +452,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { int IA = PathA.size() - 1; int IB = PathB.size() - 1; - MDNode *Ret = nullptr; + const MDNode *Ret = nullptr; while (IA >= 0 && IB >= 0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; @@ -472,17 +462,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { --IB; } - // We either did not find a match, or the only common base "type" is - // the root node. In either case, we don't have any useful TBAA - // metadata to attach. - if (!Ret || Ret->getNumOperands() < 2) - return nullptr; - - // We need to convert from a type node to a tag node. - Type *Int64 = IntegerType::get(A->getContext(), 64); - Metadata *Ops[3] = {Ret, Ret, - ConstantAsMetadata::get(ConstantInt::get(Int64, 0))}; - return MDNode::get(A->getContext(), Ops); + return Ret; } void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { @@ -505,70 +485,96 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { N.NoAlias = getMetadata(LLVMContext::MD_noalias); } -/// Aliases - Test whether the type represented by A may alias the -/// type represented by B. -bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { - // Verify that both input nodes are struct-path aware. Auto-upgrade should - // have taken care of this. - assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); - assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); +static bool findAccessType(TBAAStructTagNode BaseTag, + const MDNode *AccessTypeNode, + uint64_t &OffsetInBase) { + // Start from the base type, follow the edge with the correct offset in + // the type DAG and adjust the offset until we reach the access type or + // until we reach a root node. + TBAAStructTypeNode BaseType(BaseTag.getBaseType()); + OffsetInBase = BaseTag.getOffset(); + + while (const MDNode *BaseTypeNode = BaseType.getNode()) { + if (BaseTypeNode == AccessTypeNode) + return true; - // Keep track of the root node for A and B. - TBAAStructTypeNode RootA, RootB; - TBAAStructTagNode TagA(A), TagB(B); + // Follow the edge with the correct offset, Offset will be adjusted to + // be relative to the field type. + BaseType = BaseType.getParent(OffsetInBase); + } + return false; +} - // TODO: We need to check if AccessType of TagA encloses AccessType of - // TagB to support aggregate AccessType. If yes, return true. +static const MDNode *createAccessTag(const MDNode *AccessType) { + // If there is no access type or the access type is the root node, then + // we don't have any useful access tag to return. + if (!AccessType || AccessType->getNumOperands() < 2) + return nullptr; - // Start from the base type of A, follow the edge with the correct offset in - // the type DAG and adjust the offset until we reach the base type of B or - // until we reach the Root node. - // Compare the adjusted offset once we have the same base. + Type *Int64 = IntegerType::get(AccessType->getContext(), 64); + auto *ImmutabilityFlag = ConstantAsMetadata::get(ConstantInt::get(Int64, 0)); + Metadata *Ops[] = {const_cast<MDNode*>(AccessType), + const_cast<MDNode*>(AccessType), ImmutabilityFlag}; + return MDNode::get(AccessType->getContext(), Ops); +} - // Climb the type DAG from base type of A to see if we reach base type of B. - const MDNode *BaseA = TagA.getBaseType(); - const MDNode *BaseB = TagB.getBaseType(); - uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); - for (TBAAStructTypeNode T(BaseA);;) { - if (T.getNode() == BaseB) - // Base type of A encloses base type of B, check if the offsets match. - return OffsetA == OffsetB; - - RootA = T; - // Follow the edge with the correct offset, OffsetA will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetA); - if (!T.getNode()) - break; +/// matchTags - Return true if the given couple of accesses are allowed to +/// overlap. If \arg GenericTag is not null, then on return it points to the +/// most generic access descriptor for the given two. +static bool matchAccessTags(const MDNode *A, const MDNode *B, + const MDNode **GenericTag) { + if (A == B) { + if (GenericTag) + *GenericTag = A; + return true; } - // Reset OffsetA and climb the type DAG from base type of B to see if we reach - // base type of A. - OffsetA = TagA.getOffset(); - for (TBAAStructTypeNode T(BaseB);;) { - if (T.getNode() == BaseA) - // Base type of B encloses base type of A, check if the offsets match. - return OffsetA == OffsetB; - - RootB = T; - // Follow the edge with the correct offset, OffsetB will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetB); - if (!T.getNode()) - break; + // Accesses with no TBAA information may alias with any other accesses. + if (!A || !B) { + if (GenericTag) + *GenericTag = nullptr; + return true; } - // Neither node is an ancestor of the other. + // Verify that both input nodes are struct-path aware. Auto-upgrade should + // have taken care of this. + assert(isStructPathTBAA(A) && "Access A is not struct-path aware!"); + assert(isStructPathTBAA(B) && "Access B is not struct-path aware!"); + + TBAAStructTagNode TagA(A), TagB(B); + const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(), + TagB.getAccessType()); + if (GenericTag) + *GenericTag = createAccessTag(CommonType); - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) + // TODO: We need to check if AccessType of TagA encloses AccessType of + // TagB to support aggregate AccessType. If yes, return true. + + // Climb the type DAG from base type of A to see if we reach base type of B. + uint64_t OffsetA; + if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) + return OffsetA == TagB.getOffset(); + + // Climb the type DAG from base type of B to see if we reach base type of A. + uint64_t OffsetB; + if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) + return OffsetB == TagA.getOffset(); + + // If the final access types have different roots, they're part of different + // potentially unrelated type systems, so we must be conservative. + if (!CommonType) return true; // If they have the same root, then we've proved there's no alias. return false; } +/// Aliases - Test whether the access represented by tag A may alias the +/// access represented by tag B. +bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { + return matchAccessTags(A, B); +} + AnalysisKey TypeBasedAA::Key; TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 3f53fc517e39..2010858139a6 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2579,9 +2579,7 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: - if (ICS->hasNoNaNs()) - return Intrinsic::sqrt; - return Intrinsic::not_intrinsic; + return Intrinsic::sqrt; } return Intrinsic::not_intrinsic; @@ -4140,7 +4138,8 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, // Is the sign bit set? // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN - if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) + if (Pred == CmpInst::ICMP_SLT && C1->isNullValue() && + C2->isMaxSignedValue()) return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; // Is the sign bit clear? @@ -4272,13 +4271,15 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X - if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) { + if (Pred == ICmpInst::ICMP_SGT && + (C1->isNullValue() || C1->isAllOnesValue())) { return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X - if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) { + if (Pred == ICmpInst::ICMP_SLT && + (C1->isNullValue() || C1->isOneValue())) { return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } } |