mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357

//===-- Passes.td - Sparse tensor pass definition file -----*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_PASSES
#define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_PASSES

include "mlir/Pass/PassBase.td"

def PreSparsificationRewrite : Pass<"pre-sparsification-rewrite", "ModuleOp"> {
  let summary = "Applies sparse tensor rewriting rules prior to sparsification";
  let description = [{
    A pass that applies rewriting rules to sparse tensor operations prior
    to running the actual sparsification pass.
  }];
  let constructor = "mlir::createPreSparsificationRewritePass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "bufferization::BufferizationDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
}

def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
  let summary = "Automatically generate sparse tensor code from sparse tensor types";
  let description = [{
    A pass that implements the core functionality of a **sparse compiler**.
    Each Linalg operation (MLIR's tensor index notation) that operates on
    sparse tensor types is converted into code in which the sparsity is
    explicit both in terms of co-iterating looping logic as well as
    selected sparse storage schemes.

    See the `SparseTensor` dialect documentation for more background.

    Example input:

    ```mlir
    #matvec = {
      indexing_maps = [
        affine_map<(i,j) -> (i,j)>, // A
        affine_map<(i,j) -> (j)>,   // b
        affine_map<(i,j) -> (i)>    // x (out)
      ],
      iterator_types = ["parallel", "reduction"],
      doc = "X(i) += A(i,j) * B(j)"
    }

    // Multiply a sparse matrix A with a dense vector b into a dense vector x.
    func.func @kernel_matvec(%arga: tensor<?x?xf64, #SparseMatrix>,
                             %argb: tensor<?xf64>,
                             %argx: tensor<?xf64>) -> tensor<?xf64> {
      %0 = linalg.generic #matvec
        ins(%arga, %argb: tensor<?x?xf64, #SparseMatrix>, tensor<?xf64>)
        outs(%argx: tensor<?xf64>) {
        ^bb(%a: f64, %b: f64, %x: f64):
          %0 = arith.mulf %a, %b : f64
          %1 = arith.addf %x, %0 : f64
          linalg.yield %1 : f64
      } -> tensor<?xf64>
      return %0 : tensor<?xf64>
    }
    ```
  }];
  let constructor = "mlir::createSparsificationPass()";
  let dependentDialects = [
    "affine::AffineDialect",
    "arith::ArithDialect",
    "bufferization::BufferizationDialect",
    "LLVM::LLVMDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
  // TODO(57514): These enum options are duplicated in Passes.h.
  let options = [
    Option<"enableIndexReduction", "enable-index-reduction", "bool",
           "false",
           "Enable dependent index reduction based algorithm to handle non-trivial index expressions on sparse inputs (experimental features)">,
    Option<"parallelization", "parallelization-strategy", "mlir::SparseParallelizationStrategy",
           "mlir::SparseParallelizationStrategy::kNone",
           "Set the parallelization strategy", [{llvm::cl::values(
             clEnumValN(mlir::SparseParallelizationStrategy::kNone, "none",
                        "Turn off sparse parallelization."),
             clEnumValN(mlir::SparseParallelizationStrategy::kDenseOuterLoop,
                        "dense-outer-loop",
                        "Enable dense outer loop sparse parallelization."),
             clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageOuterLoop,
                        "any-storage-outer-loop",
                        "Enable sparse parallelization regardless of storage for the outer loop."),
             clEnumValN(mlir::SparseParallelizationStrategy::kDenseAnyLoop,
                        "dense-any-loop",
                        "Enable dense parallelization for any loop."),
             clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
                        "any-storage-any-loop",
                        "Enable sparse parallelization for any storage and loop."))}]>
  ];
}

def PostSparsificationRewrite : Pass<"post-sparsification-rewrite", "ModuleOp"> {
  let summary = "Applies sparse tensor rewriting rules after sparsification";
  let description = [{
    A pass that applies rewriting rules to sparse tensor operations after
    running the actual sparsification pass.
  }];
  let constructor = "mlir::createPostSparsificationRewritePass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "bufferization::BufferizationDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
  let options = [
    Option<"enableRuntimeLibrary", "enable-runtime-library", "bool",
           "true", "Enable runtime library for manipulating sparse tensors">,
    Option<"enableForeach", "enable-foreach", "bool",
           "true", "Enable rewriting rules for the foreach operator">,
    Option<"enableConvert", "enable-convert", "bool",
           "true", "Enable rewriting rules for the convert operator">,
  ];
}

def SparseTensorConversionPass : Pass<"sparse-tensor-conversion", "ModuleOp"> {
  let summary = "Convert sparse tensors and primitives to library calls";
  let description = [{
    A pass that converts sparse tensor primitives into calls into a runtime
    support library. Sparse tensor types are converted into opaque pointers
    to the underlying sparse storage schemes.

    The use of opaque pointers together with runtime support library keeps
    the conversion relatively simple, but at the expense of IR opacity,
    which obscures opportunities for subsequent optimization of the IR.
    An alternative is provided by the SparseTensorCodegen pass.

    Example of the conversion:

    ```mlir
      Before:
        func.func @foo(%arg0: tensor<8x8xf32, #CSR>) -> memref<?xindex> {
          %0 = sparse_tensor.pointers %arg0 {dimension = 1 : index}
             : tensor<8x8xf32, #CSR> to memref<?xindex>
          return %0 : memref<?xindex>
        }

      After:
        func.func @foo(%arg0: !llvm.ptr<i8>) -> memref<?xindex> {
          %c1 = arith.constant 1 : index
          %0 = call @sparsePointers0(%arg0, %c1)
             : (!llvm.ptr<i8>, index) -> memref<?xindex>
          return %0 : memref<?xindex>
        }
    ```
  }];
  let constructor = "mlir::createSparseTensorConversionPass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "bufferization::BufferizationDialect",
    "LLVM::LLVMDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
  let options = [
    Option<"sparseToSparse", "s2s-strategy", "int32_t", "0",
           "Set the strategy for sparse-to-sparse conversion">,
  ];
}

def SparseTensorCodegen : Pass<"sparse-tensor-codegen", "ModuleOp"> {
  let summary = "Convert sparse tensors and primitives to actual code";
  let description = [{
    A pass that converts sparse tensor types and primitives to actual
    compiler visible buffers and compiler IR that implements these
    primitives on the selected sparse tensor storage schemes.

    This pass provides an alternative to the SparseTensorConversion pass,
    eliminating the dependence on a runtime support library, and providing
    much more opportunities for subsequent compiler optimization of the
    generated code.

    Example of the conversion:

    ```mlir
      Before:
        func.func @foo(%arg0: tensor<8x8xf32, #CSR>) -> memref<?xindex> {
          %0 = sparse_tensor.pointers %arg0 {dimension = 1 : index}
             : tensor<8x8xf32, #CSR> to memref<?xindex>
          return %0 : memref<?xindex>
        }

      After:
        func.func @foo(%arg0: memref<2xindex>,
                       %arg1: memref<3xindex>,
                       %arg2: memref<?xindex>,
                       %arg3: memref<?xindex>,
                       %arg4: memref<?xf32>) -> memref<?xindex> {
          return %arg2 : memref<?xindex>
        }
    ```
  }];
  let constructor = "mlir::createSparseTensorCodegenPass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "bufferization::BufferizationDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
  let options = [
    Option<"enableBufferInitialization", "enable-buffer-initialization", "bool",
           "false", "Enable zero-initialization of the memory buffers">,
    Option<"createSparseDeallocs", "create-sparse-deallocs", "bool",
           "true", "Specify if the temporary buffers created by the sparse "
                   "compiler should be deallocated. For compatibility with core "
                   "bufferization passes. "
                   "This option is only used when enable-runtime-library=false. "
                   "See also create-deallocs for BufferizationOption.">,
  ];
}

def SparseBufferRewrite : Pass<"sparse-buffer-rewrite", "ModuleOp"> {
  let summary = "Rewrite sparse primitives on buffers to actual code";
  let description = [{
    A pass that rewrites sparse primitives on buffers to the MLIR implementation
    of the primitives. For example, sparse_tensor.sort operator is implemented
    in this pass.
  }];
  let constructor = "mlir::createSparseBufferRewritePass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
  let options = [
    Option<"enableBufferInitialization", "enable-buffer-initialization", "bool",
           "false", "Enable zero-initialization of the memory buffers">,
  ];
}

def SparseVectorization : Pass<"sparse-vectorization", "ModuleOp"> {
  let summary = "Vectorizes loops after sparsification";
  let description = [{
    A pass that converts loops after sparsification into vector loops.
    The vector dialect is used as target to provide an architectural
    neutral way of exploiting any platform that supports SIMD instructions.

    The vector length (viz. `vl`) describes the number of packed data elements
    (e.g. both vector<16xf32> and vector<16xf64> have a vector length of 16 even
    though the actual bitwidths differ). A small multiple of the actual lengths
    supported in hardware typically results in efficient SIMD code, since the
    backend will map longer vectors to multiple vector registers, thereby
    effectively unrolling an addition level within the generated for-loop.

    Example of the conversion:

    ```mlir
      Before:
        %3 = memref.load %2[] : memref<f32>
        %4 = scf.for %arg3 = %c0 to %c1024 step %c1 iter_args(%arg4 = %3) -> (f32) {
          %6 = memref.load %0[%arg3] : memref<?xf32>
          %7 = memref.load %1[%arg3] : memref<1024xf32>
          %8 = arith.mulf %6, %7 : f32
          %9 = arith.addf %arg4, %8 : f32
          scf.yield %9 : f32
        }
        memref.store %4, %2[] : memref<f32>

      After:
        %3 = memref.load %2[] : memref<f32>
        %4 = vector.insertelement %3, %cst[%c0 : index] : vector<32xf32>
        %5 = scf.for %arg3 = %c0 to %c1024 step %c32 iter_args(%arg4 = %4) -> (vector<32xf32>) {
          %8 = vector.load %0[%arg3] : memref<?xf32>, vector<32xf32>
          %9 = vector.load %1[%arg3] : memref<1024xf32>, vector<32xf32>
          %10 = arith.mulf %8, %9 : vector<32xf32>
          %11 = arith.addf %arg4, %10 : vector<32xf32>
          scf.yield %11 : vector<32xf32>
        }
        %6 = vector.reduction <add>, %5 : vector<32xf32> into f32
        memref.store %6, %2[] : memref<f32>
    ```
  }];
  let constructor = "mlir::createSparseVectorizationPass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
    "vector::VectorDialect",
  ];
  let options = [
    Option<"vectorLength", "vl", "int32_t", "0",
           "Set the vector length (use 0 to disable vectorization)">,
    Option<"enableVLAVectorization", "enable-vla-vectorization", "bool",
           "false", "Enable vector length agnostic vectorization">,
    Option<"enableSIMDIndex32", "enable-simd-index32", "bool", "false",
           "Enable i32 indexing into vectors (for efficient gather/scatter)">,
  ];
}

def SparseGPUCodegen : Pass<"sparse-gpu-codegen", "ModuleOp"> {
  let summary = "Generates GPU code during sparsification";
  let description = [{
    Enables sparse compiler to use GPU acceleration.
  }];
  let constructor = "mlir::createSparseGPUCodegenPass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "bufferization::BufferizationDialect",
    "gpu::GPUDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
  let options = [
    Option<"numThreads", "num_threads", "int32_t", "1024", "Sets the number of GPU threads">,
  ];
}

def StorageSpecifierToLLVM : Pass<"sparse-storage-specifier-to-llvm", "ModuleOp"> {
  let summary = "Lower sparse storage specifer to llvm structure";
  let description = [{
     This pass rewrites sparse tensor storage specifier-related operations into
     LLVMDialect, and converts sparse tensor storage specifier into an llvm.struct.

     Example of the conversion:
     ```mlir
     Before:
       %0 = sparse_tensor.storage_specifier.get %arg0 dim_sz at 0
       : !sparse_tensor.storage_specifier<#CSR> to i64

     After:
       %0 = llvm.extractvalue %arg0[0, 0] : !llvm.struct<(array<2 x i64>, array<3 x i64>)>
     ```
  }];
  let constructor = "mlir::createStorageSpecifierToLLVMPass()";
  let dependentDialects = [
    "arith::ArithDialect",
    "LLVM::LLVMDialect",
    "sparse_tensor::SparseTensorDialect",
  ];
}

#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_PASSES