summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Springer <springerm@google.com>2023-04-28 10:34:03 +0900
committerMatthias Springer <springerm@google.com>2023-04-28 11:46:32 +0900
commit77124386feb615343afef2740396de1baceb1336 (patch)
tree3866cb5e9c496a4df79e7aaed34ee7cc76af5b18
parentfbf42f1fe2b541ea039ea576f57040eec2ff9480 (diff)
downloadllvm-77124386feb615343afef2740396de1baceb1336.tar.gz
[mlir][tensor] Add transform to make tensor.pad loop-independent
Add a transform to make `tensor.pad` and `tensor.empty` ops independent of SCF loop IVs. Such ops can then be hoisted. E.g.: ``` scf.for %iv = %lb to %ub step %step { %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] %p = tensor.pad %t low[5] high[%high] ... ... } ``` Is transformed to: ``` %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub] %p_hoistable = tensor.pad %t low[5] high[%high_new] %dim = tensor.dim %t, %c0 %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>(%iv)[%ub, %dim] %slice = tensor.extract_slice %p_hoistable [0] [%size] [1] ``` Differential Revision: https://reviews.llvm.org/D143910
-rw-r--r--mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h14
-rw-r--r--mlir/include/mlir/Dialect/Tensor/CMakeLists.txt1
-rw-r--r--mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt6
-rw-r--r--mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h8
-rw-r--r--mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td64
-rw-r--r--mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h39
-rw-r--r--mlir/include/mlir/InitAllDialects.h2
-rw-r--r--mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h25
-rw-r--r--mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp9
-rw-r--r--mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt6
-rw-r--r--mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp81
-rw-r--r--mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt3
-rw-r--r--mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp136
-rw-r--r--mlir/lib/Interfaces/ValueBoundsOpInterface.cpp36
-rw-r--r--mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir151
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/BUILD.bazel38
16 files changed, 611 insertions, 8 deletions
diff --git a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
index 02938d1f7e6c..8e840e744064 100644
--- a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
@@ -15,9 +15,11 @@
#define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
+#include "mlir/Support/LLVM.h"
#include "mlir/Support/LogicalResult.h"
namespace mlir {
+class AffineMap;
class Location;
class OpBuilder;
class OpFoldResult;
@@ -85,6 +87,18 @@ FailureOr<OpFoldResult> reifyShapedValueDimBound(
ValueBoundsConstraintSet::StopConditionFn stopCondition = nullptr,
bool closedUB = false);
+/// Materialize an already computed bound with Affine dialect ops.
+///
+/// * `ValueBoundsOpInterface::computeBound` computes bounds but does not
+/// create IR. It is dialect independent.
+/// * `materializeComputedBound` materializes computed bounds with Affine
+/// dialect ops.
+/// * `reifyIndexValueBound`/`reifyShapedValueDimBound` are a combination of
+/// the two functions mentioned above.
+OpFoldResult materializeComputedBound(
+ OpBuilder &b, Location loc, AffineMap boundMap,
+ ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands);
+
} // namespace affine
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt
index 9f57627c321f..cb1e9d01821a 100644
--- a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt
@@ -1,2 +1,3 @@
add_subdirectory(IR)
add_subdirectory(Transforms)
+add_subdirectory(TransformOps)
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt
new file mode 100644
index 000000000000..bb9f703602dc
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_TARGET_DEFINITIONS TensorTransformOps.td)
+mlir_tablegen(TensorTransformOps.h.inc -gen-op-decls)
+mlir_tablegen(TensorTransformOps.cpp.inc -gen-op-defs)
+add_public_tablegen_target(MLIRTensorTransformOpsIncGen)
+
+add_mlir_doc(TensorTransformOps TensorTransformOps Dialects/ -gen-op-doc)
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h
index d1b14d206cd7..c735700b7738 100644
--- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h
@@ -11,10 +11,13 @@
#include "mlir/Dialect/PDL/IR/PDLTypes.h"
#include "mlir/Dialect/Transform/IR/TransformOps.h"
+#include "mlir/Dialect/Transform/IR/TransformTypes.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/PatternMatch.h"
namespace mlir {
+class DialectRegistry;
+
namespace tensor {
/// A specialized TrackingListener for transform ops that operate on tensor IR.
@@ -29,7 +32,12 @@ protected:
ValueRange newValues) const override;
};
+void registerTransformDialectExtension(DialectRegistry &registry);
+
} // namespace tensor
} // namespace mlir
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc"
+
#endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
new file mode 100644
index 000000000000..42be8821addc
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
@@ -0,0 +1,64 @@
+//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TENSOR_TRANSFORM_OPS
+#define TENSOR_TRANSFORM_OPS
+
+include "mlir/Dialect/PDL/IR/PDLTypes.td"
+include "mlir/Dialect/Transform/IR/TransformDialect.td"
+include "mlir/Dialect/Transform/IR/TransformInterfaces.td"
+include "mlir/Dialect/Transform/IR/TransformTypes.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/OpBase.td"
+
+def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
+
+def MakeLoopIndependentOp
+ : Op<Transform_Dialect, "tensor.make_loop_independent",
+ [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
+ TransformOpInterface, TransformEachOpTrait]> {
+ let description = [{
+ Rewrite the targeted ops such that their index-typed operands no longer
+ depend on any loop induction variable of the `num_loop` enclosing `scf.for`
+ loops. I.e., compute an upper bound that is independent of any such loop IV
+ for every tensor dimension. The transformed op could then be hoisted from
+ the `num_loop` enclosing loops. To preserve the original semantics, place a
+ `tensor.extract_slice` inside the loop.
+
+ Currently supported operations are:
+ - tensor.empty: Replaced with a new tensor.empty with upper bound sizes,
+ followed by a tensor.extract_slice.
+ - tensor.pad: Replaced by an upper bound padding, followed by a
+ tensor.extract_slice.
+
+ #### Return modes
+
+ This operation fails if at least one induction variable could not be
+ eliminated. In case the targeted op is already independent of induction
+ variables, this transform succeeds and returns the unmodified target op.
+
+ Otherwise, the returned handle points to a subset of the produced ops:
+ - tensor.empty: The returned handle points to the tensor.extract_slice op.
+ - tensor.pad: The returned handle points to the tensor.extract_slice op.
+
+ This transform op consumes the target handle and produces a result handle.
+ }];
+
+ let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops);
+ let results = (outs PDL_Operation:$transformed);
+ let assemblyFormat = "$target attr-dict";
+
+ let extraClassDeclaration = [{
+ ::mlir::DiagnosedSilenceableFailure applyToOne(
+ ::mlir::Operation *target,
+ ::mlir::transform::ApplyToEachResultList &results,
+ ::mlir::transform::TransformState &state);
+ }];
+}
+
+#endif // TENSOR_TRANSFORM_OPS
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
index a3b5abf08fd7..9922dc8358ac 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
@@ -61,6 +61,45 @@ void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns);
/// respectively.
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
+//===----------------------------------------------------------------------===//
+// Transform helpers
+//===----------------------------------------------------------------------===//
+
+/// Build a new tensor::PadOp with low/high padding that is independent of all
+/// given independencies. If the op is already independent of all
+/// independencies, the same PadOp result is returned.
+///
+/// Failure indicates the no suitable upper bound for low/high padding could be
+/// found.
+///
+/// Example:
+/// scf.for %iv = %lb to %ub step %step {
+/// %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+/// %p = tensor.pad %t low[5] high[%high] ...
+/// ...
+/// }
+///
+/// The function builds IR such as:
+/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub]
+/// %p_hoistable = tensor.pad %t low[5] high[%high_new]
+/// %dim = tensor.dim %t, %c0
+/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+/// (%iv)[%ub, %dim]
+/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1]
+///
+/// The slice is returned.
+FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
+ ValueRange independencies);
+
+/// Build a new tensor::EmptyOp who's dynamic sizes are independent of all
+/// given independencies. If the op is already independent of all
+/// independencies, the same EmptyOp result is returned.
+///
+/// Failure indicates the no suitable upper bound for the dynamic sizes could be
+/// found.
+FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::EmptyOp emptyOp,
+ ValueRange independencies);
+
} // namespace tensor
} // namespace mlir
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index 6f78babc6f33..560b683328dc 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -71,6 +71,7 @@
#include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
#include "mlir/Dialect/Tensor/IR/ValueBoundsOpInterfaceImpl.h"
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
@@ -132,6 +133,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
linalg::registerTransformDialectExtension(registry);
memref::registerTransformDialectExtension(registry);
scf::registerTransformDialectExtension(registry);
+ tensor::registerTransformDialectExtension(registry);
vector::registerTransformDialectExtension(registry);
// Register all external models.
diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
index b4d070a42d98..ac71b73ae1e3 100644
--- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
+++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
@@ -114,12 +114,25 @@ public:
/// Compute a bound in terms of the values/dimensions in `dependencies`. The
/// computed bound consists of only constant terms and dependent values (or
/// dimension sizes thereof).
- static LogicalResult computeBound(AffineMap &resultMap,
- ValueDimList &mapOperands,
- presburger::BoundType type, Value value,
- std::optional<int64_t> dim,
- ValueDimList dependencies,
- bool closedUB = false);
+ static LogicalResult
+ computeDependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
+ presburger::BoundType type, Value value,
+ std::optional<int64_t> dim, ValueDimList dependencies,
+ bool closedUB = false);
+
+ /// Compute a bound in that is independent of all values in `independencies`.
+ ///
+ /// Independencies are the opposite of dependencies. The computed bound does
+ /// not contain any SSA values that are part of `independencies`. E.g., this
+ /// function can be used to make ops hoistable from loops. To that end, ops
+ /// must be made independent of loop induction variables (in the case of "for"
+ /// loops). Loop induction variables are the independencies; they may not
+ /// appear in the computed bound.
+ static LogicalResult
+ computeIndependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
+ presburger::BoundType type, Value value,
+ std::optional<int64_t> dim, ValueRange independencies,
+ bool closedUB = false);
/// Compute a constant bound for the given index-typed value or shape
/// dimension size.
diff --git a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp
index 0efe31c6cb28..4990229dfd3c 100644
--- a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp
@@ -19,7 +19,7 @@ using namespace mlir::affine;
static FailureOr<OpFoldResult>
reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
Value value, std::optional<int64_t> dim,
- function_ref<bool(Value, std::optional<int64_t>)> stopCondition,
+ ValueBoundsConstraintSet::StopConditionFn stopCondition,
bool closedUB) {
// Compute bound.
AffineMap boundMap;
@@ -28,6 +28,13 @@ reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
boundMap, mapOperands, type, value, dim, stopCondition, closedUB)))
return failure();
+ // Reify bound.
+ return affine::materializeComputedBound(b, loc, boundMap, mapOperands);
+}
+
+OpFoldResult affine::materializeComputedBound(
+ OpBuilder &b, Location loc, AffineMap boundMap,
+ ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands) {
// Materialize tensor.dim/memref.dim ops.
SmallVector<Value> operands;
for (auto valueDim : mapOperands) {
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt
index e8be901fe864..be1a5ddf7e7a 100644
--- a/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt
@@ -4,9 +4,15 @@ add_mlir_dialect_library(MLIRTensorTransformOps
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps
+ DEPENDS
+ MLIRTensorTransformOpsIncGen
+
LINK_LIBS PUBLIC
+ MLIRAffineDialect
MLIRIR
MLIRPDLDialect
+ MLIRSCFDialect
MLIRTensorDialect
+ MLIRTensorTransforms
MLIRTransformDialect
)
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
index 01e1a35e76bc..4394465d4bdd 100644
--- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
+++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
@@ -8,8 +8,12 @@
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
+#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
#include "llvm/ADT/TypeSwitch.h"
using namespace mlir;
@@ -49,3 +53,80 @@ tensor::TrackingListener::findReplacementOp(Operation *op,
return nullptr;
}
+
+//===----------------------------------------------------------------------===//
+// MakeLoopIndependentOp
+//===----------------------------------------------------------------------===//
+
+DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne(
+ Operation *target, transform::ApplyToEachResultList &results,
+ transform::TransformState &state) {
+ // Gather IVs.
+ SmallVector<Value> ivs;
+ Operation *nextOp = target;
+ for (uint64_t i = 0, e = getNumLoops(); i < e; ++i) {
+ nextOp = nextOp->getParentOfType<scf::ForOp>();
+ if (!nextOp) {
+ DiagnosedSilenceableFailure diag = emitSilenceableError()
+ << "could not find " << i
+ << "-th enclosing loop";
+ diag.attachNote(target->getLoc()) << "target op";
+ return diag;
+ }
+ ivs.push_back(cast<scf::ForOp>(nextOp).getInductionVar());
+ }
+
+ // Rewrite IR.
+ IRRewriter rewriter(target->getContext());
+ FailureOr<Value> replacement = failure();
+ if (auto padOp = dyn_cast<tensor::PadOp>(target)) {
+ replacement = tensor::buildIndependentOp(rewriter, padOp, ivs);
+ } else if (auto emptyOp = dyn_cast<tensor::EmptyOp>(target)) {
+ replacement = tensor::buildIndependentOp(rewriter, emptyOp, ivs);
+ } else {
+ DiagnosedSilenceableFailure diag = emitSilenceableError()
+ << "unsupported target op";
+ diag.attachNote(target->getLoc()) << "target op";
+ return diag;
+ }
+ if (failed(replacement)) {
+ DiagnosedSilenceableFailure diag =
+ emitSilenceableError() << "could not make target op loop-independent";
+ diag.attachNote(target->getLoc()) << "target op";
+ return diag;
+ }
+ rewriter.replaceOp(target, *replacement);
+ results.push_back(replacement->getDefiningOp());
+ return DiagnosedSilenceableFailure::success();
+}
+
+//===----------------------------------------------------------------------===//
+// Transform op registration
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TensorTransformDialectExtension
+ : public transform::TransformDialectExtension<
+ TensorTransformDialectExtension> {
+public:
+ using Base::Base;
+
+ void init() {
+ declareGeneratedDialect<affine::AffineDialect>();
+ declareGeneratedDialect<tensor::TensorDialect>();
+
+ registerTransformOps<
+#define GET_OP_LIST
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
+ >();
+ }
+};
+} // namespace
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
+
+void mlir::tensor::registerTransformDialectExtension(
+ DialectRegistry &registry) {
+ registry.addExtensions<TensorTransformDialectExtension>();
+}
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
index 44579546f7ea..c41e9e9ce683 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
ExtractSliceFromReshapeUtils.cpp
FoldIntoPackAndUnpackPatterns.cpp
FoldTensorSubsetOps.cpp
+ IndependenceTransforms.cpp
MergeConsecutiveInsertExtractSlicePatterns.cpp
ReshapePatterns.cpp
SwapExtractSliceWithProducerPatterns.cpp
@@ -17,6 +18,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
LINK_LIBS PUBLIC
MLIRAffineDialect
+ MLIRAffineTransforms
MLIRAffineUtils
MLIRArithDialect
MLIRBufferizationDialect
@@ -30,4 +32,5 @@ add_mlir_dialect_library(MLIRTensorTransforms
MLIRTilingInterface
MLIRTransforms
MLIRVectorDialect
+ MLIRValueBoundsOpInterface
)
diff --git a/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp
new file mode 100644
index 000000000000..721730862d49
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp
@@ -0,0 +1,136 @@
+//===- IndependenceTransforms.cpp - Make ops independent of values --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Transforms/Transforms.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
+
+using namespace mlir;
+using namespace mlir::tensor;
+
+/// Make the given OpFoldResult independent of all independencies.
+static FailureOr<OpFoldResult> makeIndependent(OpBuilder &b, Location loc,
+ OpFoldResult ofr,
+ ValueRange independencies) {
+ if (ofr.is<Attribute>())
+ return ofr;
+ Value value = ofr.get<Value>();
+ AffineMap boundMap;
+ ValueDimList mapOperands;
+ if (failed(ValueBoundsConstraintSet::computeIndependentBound(
+ boundMap, mapOperands, presburger::BoundType::UB, value,
+ /*dim=*/std::nullopt, independencies, /*closedUB=*/true)))
+ return failure();
+ return mlir::affine::materializeComputedBound(b, loc, boundMap, mapOperands);
+}
+
+FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
+ ValueRange independencies) {
+ OpBuilder::InsertionGuard g(b);
+ b.setInsertionPoint(padOp);
+ Location loc = padOp.getLoc();
+
+ // Non-constant padding not supported.
+ Value constantPadding = padOp.getConstantPaddingValue();
+ if (!constantPadding)
+ return failure();
+
+ SmallVector<OpFoldResult> newMixedLow, newMixedHigh;
+ for (OpFoldResult ofr : padOp.getMixedLowPad()) {
+ auto ub = makeIndependent(b, loc, ofr, independencies);
+ if (failed(ub))
+ return failure();
+ newMixedLow.push_back(*ub);
+ }
+ for (OpFoldResult ofr : padOp.getMixedHighPad()) {
+ auto ub = makeIndependent(b, loc, ofr, independencies);
+ if (failed(ub))
+ return failure();
+ newMixedHigh.push_back(*ub);
+ }
+
+ // Return existing tensor::PadOp if nothing has changed.
+ if (llvm::equal(padOp.getMixedLowPad(), newMixedLow) &&
+ llvm::equal(padOp.getMixedHighPad(), newMixedHigh))
+ return padOp.getResult();
+
+ // Create a new tensor::PadOp.
+ auto newPadOp = b.create<PadOp>(
+ loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh,
+ constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef<NamedAttribute>{});
+
+ // Create a tensor::ExtractSliceOp.
+ // Reify the result sizes of the old tensor::PadOp.
+ ReifiedRankedShapedTypeDims reifiedSizes;
+ ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+ dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
+ if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes)))
+ return failure();
+ SmallVector<OpFoldResult> offsets, sizes, strides;
+ for (int64_t i = 0, e = padOp.getResultType().getRank(); i < e; ++i) {
+ // offset = ub(low_padding) - low_padding
+ OpFoldResult prevLow = padOp.getMixedLowPad()[i];
+ if (prevLow.is<Attribute>()) {
+ offsets.push_back(b.getIndexAttr(0));
+ } else {
+ offsets.push_back(
+ b.create<affine::AffineApplyOp>(
+ loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1),
+ std::initializer_list<Value>{newMixedLow[i].get<Value>(),
+ prevLow.get<Value>()})
+ .getResult());
+ }
+ // size = reified result size
+ if (!padOp.getResultType().isDynamicDim(i)) {
+ sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i)));
+ } else {
+ sizes.push_back(reifiedSizes[0][i]);
+ }
+ // stride = 1
+ strides.push_back(b.getIndexAttr(1));
+ }
+
+ return b.create<ExtractSliceOp>(loc, newPadOp, offsets, sizes, strides)
+ .getResult();
+}
+
+FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b,
+ tensor::EmptyOp emptyOp,
+ ValueRange independencies) {
+ OpBuilder::InsertionGuard g(b);
+ b.setInsertionPoint(emptyOp);
+ Location loc = emptyOp.getLoc();
+
+ SmallVector<OpFoldResult> newSizes;
+ for (OpFoldResult ofr : emptyOp.getMixedSizes()) {
+ auto ub = makeIndependent(b, loc, ofr, independencies);
+ if (failed(ub))
+ return failure();
+ newSizes.push_back(*ub);
+ }
+
+ // Return existing tensor::EmptyOp if nothing has changed.
+ if (llvm::equal(emptyOp.getMixedSizes(), newSizes))
+ return emptyOp.getResult();
+
+ // Create a new tensor::EmptyOp.
+ Value newEmptyOp =
+ b.create<EmptyOp>(loc, newSizes, emptyOp.getType().getElementType());
+
+ // Create a tensor::ExtractSliceOp.
+ SmallVector<OpFoldResult> offsets(newSizes.size(), b.getIndexAttr(0));
+ SmallVector<OpFoldResult> strides(newSizes.size(), b.getIndexAttr(1));
+ return b
+ .create<ExtractSliceOp>(loc, newEmptyOp, offsets, emptyOp.getMixedSizes(),
+ strides)
+ .getResult();
+}
diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
index 597bc7d99172..95fb785defdd 100644
--- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
+++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
@@ -356,7 +356,7 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
return success();
}
-LogicalResult ValueBoundsConstraintSet::computeBound(
+LogicalResult ValueBoundsConstraintSet::computeDependentBound(
AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
Value value, std::optional<int64_t> dim, ValueDimList dependencies,
bool closedUB) {
@@ -368,6 +368,40 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
closedUB);
}
+LogicalResult ValueBoundsConstraintSet::computeIndependentBound(
+ AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
+ Value value, std::optional<int64_t> dim, ValueRange independencies,
+ bool closedUB) {
+ // Return "true" if the given value is independent of all values in
+ // `independencies`. I.e., neither the value itself nor any value in the
+ // backward slice (reverse use-def chain) is contained in `independencies`.
+ auto isIndependent = [&](Value v) {
+ SmallVector<Value> worklist;
+ DenseSet<Value> visited;
+ worklist.push_back(v);
+ while (!worklist.empty()) {
+ Value next = worklist.pop_back_val();
+ if (visited.contains(next))
+ continue;
+ visited.insert(next);
+ if (llvm::is_contained(independencies, next))
+ return false;
+ // TODO: DominanceInfo could be used to stop the traversal early.
+ Operation *op = next.getDefiningOp();
+ if (!op)
+ continue;
+ worklist.append(op->getOperands().begin(), op->getOperands().end());
+ }
+ return true;
+ };
+
+ // Reify bounds in terms of any independent values.
+ return computeBound(
+ resultMap, mapOperands, type, value, dim,
+ [&](Value v, std::optional<int64_t> d) { return isIndependent(v); },
+ closedUB);
+}
+
FailureOr<int64_t> ValueBoundsConstraintSet::computeConstantBound(
presburger::BoundType type, Value value, std::optional<int64_t> dim,
StopConditionFn stopCondition, bool closedUB) {
diff --git a/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir
new file mode 100644
index 000000000000..18a99c5c437b
--- /dev/null
+++ b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir
@@ -0,0 +1,151 @@
+// RUN: mlir-opt %s -allow-unregistered-dialect \
+// RUN: -test-transform-dialect-interpreter -canonicalize \
+// RUN: -split-input-file -verify-diagnostics | FileCheck %s
+
+// This is a test case where "high" padding depends on the IV.
+
+// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
+// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+// CHECK-LABEL: func @make_pad_loop_independent_1(
+// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
+// CHECK-SAME: %[[t:.*]]: tensor<?xf32>
+func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
+ %t: tensor<?xf32>, %f: f32) {
+ // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+ scf.for %i = %lb to %ub step %step {
+ // CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
+ // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]]
+ // CHECK: %[[dim:.*]] = tensor.dim %[[t]]
+ // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
+ // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1]
+ %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+ %p = tensor.pad %t low[5] high[%high] {
+ ^bb0(%arg1: index):
+ tensor.yield %f : f32
+ } : tensor<?xf32> to tensor<?xf32>
+ // CHECK: "dummy.some_use"(%[[replacement]])
+ "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+ %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+ %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
+
+// -----
+
+// This is a test case where "low" padding depends on the IV.
+
+// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
+// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+// CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)>
+// CHECK-LABEL: func @make_pad_loop_independent_1(
+// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
+// CHECK-SAME: %[[t:.*]]: tensor<?xf32>
+func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
+ %t: tensor<?xf32>, %f: f32) {
+ // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+ scf.for %i = %lb to %ub step %step {
+ // CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
+ // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5]
+ // CHECK: %[[dim:.*]] = tensor.dim %[[t]]
+ // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
+ // CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]]
+ // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1]
+ %low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+ %p = tensor.pad %t low[%low] high[5] {
+ ^bb0(%arg1: index):
+ tensor.yield %f : f32
+ } : tensor<?xf32> to tensor<?xf32>
+ // CHECK: "dummy.some_use"(%[[replacement]])
+ "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+ %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+ %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
+
+// -----
+
+// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)>
+// CHECK-LABEL: func @two_loops(
+func.func @two_loops(%lb: index, %ub: index, %step: index,
+ %t: tensor<?xf32>, %f: f32) {
+ scf.for %i = %lb to %ub step %step {
+ scf.for %j = %lb to %ub step %step {
+ // CHECK: affine.apply #map()[%{{.*}}]
+ %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
+ %p = tensor.pad %t low[%low] high[5] {
+ ^bb0(%arg1: index):
+ tensor.yield %f : f32
+ } : tensor<?xf32> to tensor<?xf32>
+ "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+ }
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+ %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+ %1 = transform.tensor.make_loop_independent %0 {num_loops = 2}
+}
+
+// -----
+
+func.func @not_enough_loops(%lb: index, %ub: index, %step: index,
+ %t: tensor<?xf32>, %f: f32) {
+ scf.for %i = %lb to %ub step %step {
+ scf.for %j = %lb to %ub step %step {
+ %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
+ // expected-note@below {{target op}}
+ %p = tensor.pad %t low[%low] high[5] {
+ ^bb0(%arg1: index):
+ tensor.yield %f : f32
+ } : tensor<?xf32> to tensor<?xf32>
+ "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+ }
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+ %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+ // expected-error@below {{could not find 2-th enclosing loop}}
+ %1 = transform.tensor.make_loop_independent %0 {num_loops = 3}
+}
+
+// -----
+
+// CHECK: #[[$map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
+// CHECK: #[[$map1:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
+// CHECK-LABEL: func @make_empty_loop_independent(
+// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index)
+func.func @make_empty_loop_independent(%lb: index, %ub: index, %step: index) {
+ // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+ scf.for %i = %lb to %ub step %step {
+ // CHECK: %[[slice_sz:.*]] = affine.apply #[[$map]](%[[iv]])[%[[ub]]]
+ // CHECK: %[[empty_sz:.*]] = affine.apply #[[$map1]]()[%[[ub]], %[[lb]]]
+ // CHECK: %[[empty:.*]] = tensor.empty(%[[empty_sz]]) : tensor<?xf32>
+ // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[empty]][0] [%[[slice_sz]]] [1]
+ %sz = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+ %empty = tensor.empty(%sz) : tensor<?xf32>
+ // CHECK: "dummy.some_use"(%[[replacement]])
+ "dummy.some_use"(%empty) : (tensor<?xf32>) -> ()
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+ %0 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+ %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index c941face6f43..dfa8f43ab439 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5808,6 +5808,7 @@ cc_library(
includes = ["include"],
deps = [
":AffineDialect",
+ ":AffineTransforms",
":AffineUtils",
":ArithDialect",
":ArithUtils",
@@ -5824,20 +5825,57 @@ cc_library(
":TensorPassIncGen",
":TilingInterface",
":Transforms",
+ ":ValueBoundsOpInterface",
":VectorDialect",
"//llvm:Support",
],
)
+td_library(
+ name = "TensorTransformOpsTdFiles",
+ srcs = [
+ "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
+ ],
+ includes = ["include"],
+ deps = [
+ ":PDLDialect",
+ ":TransformDialectTdFiles",
+ ],
+)
+
+gentbl_cc_library(
+ name = "TensorTransformOpsIncGen",
+ strip_include_prefix = "include",
+ tbl_outs = [
+ (
+ ["-gen-op-decls"],
+ "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc",
+ ),
+ (
+ ["-gen-op-defs"],
+ "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc",
+ ),
+ ],
+ tblgen = ":mlir-tblgen",
+ td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
+ deps = [
+ ":TensorTransformOpsTdFiles",
+ ],
+)
+
cc_library(
name = "TensorTransformOps",
srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]),
hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]),
includes = ["include"],
deps = [
+ ":AffineDialect",
":IR",
":PDLDialect",
+ ":SCFDialect",
":TensorDialect",
+ ":TensorTransformOpsIncGen",
+ ":TensorTransforms",
":TransformDialect",
"//llvm:Support",
],