diff options
author | Michael Kruse <llvm@meinersbur.de> | 2018-09-26 15:21:43 +0000 |
---|---|---|
committer | Michael Kruse <llvm@meinersbur.de> | 2018-09-26 15:21:43 +0000 |
commit | fe7bd34b79e59e2af209890918c01648ce4b9542 (patch) | |
tree | 38900d34b3b0531cc652a51c53b3f578a088e2fd /polly/docs | |
parent | ea4f20c6bef7ee65a820e65f93efe0af97997a14 (diff) | |
download | llvm-fe7bd34b79e59e2af209890918c01648ce4b9542.tar.gz |
Move www/experiments to docs/experiments
llvm-svn: 343118
Diffstat (limited to 'polly/docs')
31 files changed, 2967 insertions, 0 deletions
diff --git a/polly/docs/experiments/matmul/matmul.c b/polly/docs/experiments/matmul/matmul.c new file mode 100644 index 000000000000..49fffc808f3e --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.c @@ -0,0 +1,52 @@ +#include <stdio.h> + +#define N 1536 +float A[N][N]; +float B[N][N]; +float C[N][N]; + +void init_array() +{ + int i, j; + + for (i = 0; i < N; i++) { + for (j = 0; j < N; j++) { + A[i][j] = (1+(i*j)%1024)/2.0; + B[i][j] = (1+(i*j)%1024)/2.0; + } + } +} + +void print_array() +{ + int i, j; + + for (i = 0; i < N; i++) { + for (j = 0; j < N; j++) { + fprintf(stdout, "%lf ", C[i][j]); + if (j%80 == 79) fprintf(stdout, "\n"); + } + fprintf(stdout, "\n"); + } +} + +int main() +{ + int i, j, k; + double t_start, t_end; + + init_array(); + + for (i = 0; i < N; i++) { + for (j = 0; j < N; j++) { + C[i][j] = 0; + for (k = 0; k < N; k++) + C[i][j] = C[i][j] + A[i][k] * B[k][j]; + } + } + +#ifdef TEST + print_array(); +#endif + return 0; +} diff --git a/polly/docs/experiments/matmul/matmul.normalopt.exe b/polly/docs/experiments/matmul/matmul.normalopt.exe Binary files differnew file mode 100755 index 000000000000..cdb9e67af454 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.normalopt.exe diff --git a/polly/docs/experiments/matmul/matmul.normalopt.ll b/polly/docs/experiments/matmul/matmul.normalopt.ll Binary files differnew file mode 100644 index 000000000000..ba792c29f701 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.normalopt.ll diff --git a/polly/docs/experiments/matmul/matmul.normalopt.s b/polly/docs/experiments/matmul/matmul.normalopt.s new file mode 100644 index 000000000000..079af702a14f --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.normalopt.s @@ -0,0 +1,274 @@ + .file "matmul.normalopt.ll" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl init_array + .align 16, 0x90 + .type init_array,@function +init_array: # @init_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp2: + .cfi_def_cfa_offset 16 +.Ltmp3: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp4: + .cfi_def_cfa_register %rbp + xorl %r8d, %r8d + vmovsd .LCPI0_0(%rip), %xmm0 + .align 16, 0x90 +.LBB0_1: # %for.cond1.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB0_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_2: # %for.body3 + # Parent Loop BB0_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %r8d, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %r8, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB0_2 +# BB#3: # %for.inc17 + # in Loop: Header=BB0_1 Depth=1 + incq %r8 + cmpq $1536, %r8 # imm = 0x600 + jne .LBB0_1 +# BB#4: # %for.end19 + popq %rbp + ret +.Ltmp5: + .size init_array, .Ltmp5-init_array + .cfi_endproc + + .globl print_array + .align 16, 0x90 + .type print_array,@function +print_array: # @print_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp9: + .cfi_def_cfa_offset 16 +.Ltmp10: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp11: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx +.Ltmp12: + .cfi_offset %rbx, -48 +.Ltmp13: + .cfi_offset %r12, -40 +.Ltmp14: + .cfi_offset %r14, -32 +.Ltmp15: + .cfi_offset %r15, -24 + xorl %r14d, %r14d + movl $C, %r15d + .align 16, 0x90 +.LBB1_1: # %for.cond1.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB1_2 Depth 2 + movq stdout(%rip), %rax + movq %r15, %r12 + xorl %ebx, %ebx + .align 16, 0x90 +.LBB1_2: # %for.body3 + # Parent Loop BB1_1 Depth=1 + # => This Inner Loop Header: Depth=2 + vmovss (%r12), %xmm0 + vcvtss2sd %xmm0, %xmm0, %xmm0 + movq %rax, %rdi + movl $.L.str, %esi + movb $1, %al + callq fprintf + movslq %ebx, %rax + imulq $1717986919, %rax, %rcx # imm = 0x66666667 + movq %rcx, %rdx + shrq $63, %rdx + sarq $37, %rcx + addl %edx, %ecx + imull $80, %ecx, %ecx + subl %ecx, %eax + cmpl $79, %eax + jne .LBB1_4 +# BB#3: # %if.then + # in Loop: Header=BB1_2 Depth=2 + movq stdout(%rip), %rsi + movl $10, %edi + callq fputc +.LBB1_4: # %for.inc + # in Loop: Header=BB1_2 Depth=2 + addq $4, %r12 + incq %rbx + movq stdout(%rip), %rax + cmpq $1536, %rbx # imm = 0x600 + jne .LBB1_2 +# BB#5: # %for.end + # in Loop: Header=BB1_1 Depth=1 + movl $10, %edi + movq %rax, %rsi + callq fputc + addq $6144, %r15 # imm = 0x1800 + incq %r14 + cmpq $1536, %r14 # imm = 0x600 + jne .LBB1_1 +# BB#6: # %for.end12 + popq %rbx + popq %r12 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp16: + .size print_array, .Ltmp16-print_array + .cfi_endproc + + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI2_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: # @main + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp19: + .cfi_def_cfa_offset 16 +.Ltmp20: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp21: + .cfi_def_cfa_register %rbp + xorl %r8d, %r8d + vmovsd .LCPI2_0(%rip), %xmm0 + .align 16, 0x90 +.LBB2_1: # %for.cond1.preheader.i + # =>This Loop Header: Depth=1 + # Child Loop BB2_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB2_2: # %for.body3.i + # Parent Loop BB2_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %r8d, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %r8, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB2_2 +# BB#3: # %for.inc17.i + # in Loop: Header=BB2_1 Depth=1 + incq %r8 + cmpq $1536, %r8 # imm = 0x600 + jne .LBB2_1 +# BB#4: + xorl %r8d, %r8d + movl $A, %r9d + .align 16, 0x90 +.LBB2_5: # %for.cond1.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB2_6 Depth 2 + # Child Loop BB2_7 Depth 3 + leaq (%r8,%r8,2), %rdx + shlq $11, %rdx + leaq C(%rdx), %rsi + xorl %edi, %edi + .align 16, 0x90 +.LBB2_6: # %for.body3 + # Parent Loop BB2_5 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB2_7 Depth 3 + movl $0, (%rsi) + vxorps %xmm0, %xmm0, %xmm0 + movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000 + movq %r9, %rcx + .align 16, 0x90 +.LBB2_7: # %for.body8 + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_6 Depth=2 + # => This Inner Loop Header: Depth=3 + vmovss (%rcx), %xmm1 + vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1 + vaddss %xmm1, %xmm0, %xmm0 + addq $4, %rcx + addq $6144, %rax # imm = 0x1800 + jne .LBB2_7 +# BB#8: # %for.inc25 + # in Loop: Header=BB2_6 Depth=2 + vmovss %xmm0, (%rsi) + leaq C+4(%rdx,%rdi,4), %rsi + incq %rdi + cmpq $1536, %rdi # imm = 0x600 + jne .LBB2_6 +# BB#9: # %for.inc28 + # in Loop: Header=BB2_5 Depth=1 + addq $6144, %r9 # imm = 0x1800 + incq %r8 + cmpq $1536, %r8 # imm = 0x600 + jne .LBB2_5 +# BB#10: # %for.end30 + xorl %eax, %eax + popq %rbp + ret +.Ltmp22: + .size main, .Ltmp22-main + .cfi_endproc + + .type A,@object # @A + .comm A,9437184,16 + .type B,@object # @B + .comm B,9437184,16 + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "%lf " + .size .L.str, 5 + + .type C,@object # @C + .comm C,9437184,16 + + .section ".note.GNU-stack","",@progbits diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe Binary files differnew file mode 100755 index 000000000000..feb24366d730 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll Binary files differnew file mode 100644 index 000000000000..593794ef380b --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s new file mode 100644 index 000000000000..ca87de11704e --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s @@ -0,0 +1,754 @@ + .file "matmul.polly.interchanged+tiled+vector+openmp.ll" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl init_array + .align 16, 0x90 + .type init_array,@function +init_array: # @init_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp3: + .cfi_def_cfa_offset 16 +.Ltmp4: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp5: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %rbx + subq $24, %rsp +.Ltmp6: + .cfi_offset %rbx, -40 +.Ltmp7: + .cfi_offset %r14, -32 +.Ltmp8: + .cfi_offset %r15, -24 + leaq -32(%rbp), %rsi + movl $init_array.omp_subfn, %edi + xorl %edx, %edx + xorl %ecx, %ecx + movl $1536, %r8d # imm = 0x600 + movl $1, %r9d + callq GOMP_parallel_loop_runtime_start + leaq -40(%rbp), %rdi + leaq -48(%rbp), %rsi + callq GOMP_loop_runtime_next + testb %al, %al + je .LBB0_4 +# BB#1: + leaq -40(%rbp), %r14 + leaq -48(%rbp), %r15 + vmovsd .LCPI0_0(%rip), %xmm1 + .align 16, 0x90 +.LBB0_2: # %omp.loadIVBounds.i + # =>This Loop Header: Depth=1 + # Child Loop BB0_8 Depth 2 + # Child Loop BB0_5 Depth 3 + movq -48(%rbp), %r8 + leaq -1(%r8), %rcx + movq -40(%rbp), %rax + cmpq %rcx, %rax + jg .LBB0_3 +# BB#7: # %polly.loop_preheader4.preheader.i + # in Loop: Header=BB0_2 Depth=1 + addq $-2, %r8 + .align 16, 0x90 +.LBB0_8: # %polly.loop_preheader4.i + # Parent Loop BB0_2 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB0_5 Depth 3 + xorl %edx, %edx + .align 16, 0x90 +.LBB0_5: # %polly.loop_header3.i + # Parent Loop BB0_2 Depth=1 + # Parent Loop BB0_8 Depth=2 + # => This Inner Loop Header: Depth=3 + movl %edx, %esi + imull %eax, %esi + movl %esi, %edi + sarl $31, %edi + shrl $22, %edi + addl %esi, %edi + andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00 + negl %edi + movq %rax, %rcx + shlq $11, %rcx + leal 1(%rsi,%rdi), %ebx + leaq (%rcx,%rcx,2), %rdi + leaq 1(%rdx), %rsi + cmpq $1536, %rsi # imm = 0x600 + vcvtsi2sdl %ebx, %xmm0, %xmm0 + vmulsd %xmm1, %xmm0, %xmm0 + vcvtsd2ss %xmm0, %xmm0, %xmm0 + vmovss %xmm0, A(%rdi,%rdx,4) + vmovss %xmm0, B(%rdi,%rdx,4) + movq %rsi, %rdx + jne .LBB0_5 +# BB#6: # %polly.loop_exit5.i + # in Loop: Header=BB0_8 Depth=2 + cmpq %r8, %rax + leaq 1(%rax), %rax + jle .LBB0_8 +.LBB0_3: # %omp.checkNext.backedge.i + # in Loop: Header=BB0_2 Depth=1 + movq %r14, %rdi + movq %r15, %rsi + callq GOMP_loop_runtime_next + vmovsd .LCPI0_0(%rip), %xmm1 + testb %al, %al + jne .LBB0_2 +.LBB0_4: # %init_array.omp_subfn.exit + callq GOMP_loop_end_nowait + callq GOMP_parallel_end + addq $24, %rsp + popq %rbx + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp9: + .size init_array, .Ltmp9-init_array + .cfi_endproc + + .globl print_array + .align 16, 0x90 + .type print_array,@function +print_array: # @print_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp13: + .cfi_def_cfa_offset 16 +.Ltmp14: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp15: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx +.Ltmp16: + .cfi_offset %rbx, -48 +.Ltmp17: + .cfi_offset %r12, -40 +.Ltmp18: + .cfi_offset %r14, -32 +.Ltmp19: + .cfi_offset %r15, -24 + xorl %r14d, %r14d + movl $C, %r15d + .align 16, 0x90 +.LBB1_1: # %for.cond1.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB1_2 Depth 2 + movq stdout(%rip), %rax + movq %r15, %r12 + xorl %ebx, %ebx + .align 16, 0x90 +.LBB1_2: # %for.body3 + # Parent Loop BB1_1 Depth=1 + # => This Inner Loop Header: Depth=2 + vmovss (%r12), %xmm0 + vcvtss2sd %xmm0, %xmm0, %xmm0 + movq %rax, %rdi + movl $.L.str, %esi + movb $1, %al + callq fprintf + movslq %ebx, %rax + imulq $1717986919, %rax, %rcx # imm = 0x66666667 + movq %rcx, %rdx + shrq $63, %rdx + sarq $37, %rcx + addl %edx, %ecx + imull $80, %ecx, %ecx + subl %ecx, %eax + cmpl $79, %eax + jne .LBB1_4 +# BB#3: # %if.then + # in Loop: Header=BB1_2 Depth=2 + movq stdout(%rip), %rsi + movl $10, %edi + callq fputc +.LBB1_4: # %for.inc + # in Loop: Header=BB1_2 Depth=2 + addq $4, %r12 + incq %rbx + movq stdout(%rip), %rax + cmpq $1536, %rbx # imm = 0x600 + jne .LBB1_2 +# BB#5: # %for.end + # in Loop: Header=BB1_1 Depth=1 + movl $10, %edi + movq %rax, %rsi + callq fputc + addq $6144, %r15 # imm = 0x1800 + incq %r14 + cmpq $1536, %r14 # imm = 0x600 + jne .LBB1_1 +# BB#6: # %for.end12 + popq %rbx + popq %r12 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp20: + .size print_array, .Ltmp20-print_array + .cfi_endproc + + .globl main + .align 16, 0x90 + .type main,@function +main: # @main + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp24: + .cfi_def_cfa_offset 16 +.Ltmp25: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp26: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $24, %rsp +.Ltmp27: + .cfi_offset %rbx, -56 +.Ltmp28: + .cfi_offset %r12, -48 +.Ltmp29: + .cfi_offset %r13, -40 +.Ltmp30: + .cfi_offset %r14, -32 +.Ltmp31: + .cfi_offset %r15, -24 + callq init_array + leaq -48(%rbp), %rsi + movl $main.omp_subfn, %edi + xorl %edx, %edx + xorl %ecx, %ecx + movl $1536, %r8d # imm = 0x600 + movl $1, %r9d + callq GOMP_parallel_loop_runtime_start + leaq -56(%rbp), %rdi + leaq -64(%rbp), %rsi + callq GOMP_loop_runtime_next + testb %al, %al + je .LBB2_4 +# BB#1: + leaq -56(%rbp), %r14 + leaq -64(%rbp), %r15 + .align 16, 0x90 +.LBB2_2: # %omp.loadIVBounds.i + # =>This Loop Header: Depth=1 + # Child Loop BB2_6 Depth 2 + movq -64(%rbp), %r12 + leaq -1(%r12), %rcx + movq -56(%rbp), %rax + cmpq %rcx, %rax + jg .LBB2_3 +# BB#5: # %polly.loop_preheader4.preheader.i + # in Loop: Header=BB2_2 Depth=1 + addq $-2, %r12 + leaq (%rax,%rax,2), %rcx + leaq -1(%rax), %r13 + shlq $11, %rcx + leaq C(%rcx), %rbx + .align 16, 0x90 +.LBB2_6: # %polly.loop_preheader4.i + # Parent Loop BB2_2 Depth=1 + # => This Inner Loop Header: Depth=2 + movq %rbx, %rdi + xorl %esi, %esi + movl $6144, %edx # imm = 0x1800 + callq memset + addq $6144, %rbx # imm = 0x1800 + incq %r13 + cmpq %r12, %r13 + jle .LBB2_6 +.LBB2_3: # %omp.checkNext.backedge.i + # in Loop: Header=BB2_2 Depth=1 + movq %r14, %rdi + movq %r15, %rsi + callq GOMP_loop_runtime_next + testb %al, %al + jne .LBB2_2 +.LBB2_4: # %main.omp_subfn.exit + callq GOMP_loop_end_nowait + callq GOMP_parallel_end + leaq -48(%rbp), %rbx + movl $main.omp_subfn1, %edi + movq %rbx, %rsi + xorl %edx, %edx + xorl %ecx, %ecx + movl $1536, %r8d # imm = 0x600 + movl $64, %r9d + callq GOMP_parallel_loop_runtime_start + movq %rbx, %rdi + callq main.omp_subfn1 + callq GOMP_parallel_end + xorl %eax, %eax + addq $24, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp32: + .size main, .Ltmp32-main + .cfi_endproc + + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI3_0: + .quad 4602678819172646912 # double 0.5 + .text + .align 16, 0x90 + .type init_array.omp_subfn,@function +init_array.omp_subfn: # @init_array.omp_subfn + .cfi_startproc +# BB#0: # %omp.setup + pushq %rbp +.Ltmp36: + .cfi_def_cfa_offset 16 +.Ltmp37: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp38: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %rbx + subq $24, %rsp +.Ltmp39: + .cfi_offset %rbx, -40 +.Ltmp40: + .cfi_offset %r14, -32 +.Ltmp41: + .cfi_offset %r15, -24 + leaq -32(%rbp), %rdi + leaq -40(%rbp), %rsi + callq GOMP_loop_runtime_next + testb %al, %al + je .LBB3_4 +# BB#1: + leaq -32(%rbp), %r14 + leaq -40(%rbp), %r15 + vmovsd .LCPI3_0(%rip), %xmm1 + .align 16, 0x90 +.LBB3_2: # %omp.loadIVBounds + # =>This Loop Header: Depth=1 + # Child Loop BB3_8 Depth 2 + # Child Loop BB3_5 Depth 3 + movq -40(%rbp), %r8 + leaq -1(%r8), %rcx + movq -32(%rbp), %rax + cmpq %rcx, %rax + jg .LBB3_3 +# BB#7: # %polly.loop_preheader4.preheader + # in Loop: Header=BB3_2 Depth=1 + addq $-2, %r8 + .align 16, 0x90 +.LBB3_8: # %polly.loop_preheader4 + # Parent Loop BB3_2 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB3_5 Depth 3 + xorl %edx, %edx + .align 16, 0x90 +.LBB3_5: # %polly.loop_header3 + # Parent Loop BB3_2 Depth=1 + # Parent Loop BB3_8 Depth=2 + # => This Inner Loop Header: Depth=3 + movl %edx, %esi + imull %eax, %esi + movl %esi, %edi + sarl $31, %edi + shrl $22, %edi + addl %esi, %edi + andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00 + negl %edi + movq %rax, %rcx + shlq $11, %rcx + leal 1(%rsi,%rdi), %ebx + leaq (%rcx,%rcx,2), %rdi + leaq 1(%rdx), %rsi + cmpq $1536, %rsi # imm = 0x600 + vcvtsi2sdl %ebx, %xmm0, %xmm0 + vmulsd %xmm1, %xmm0, %xmm0 + vcvtsd2ss %xmm0, %xmm0, %xmm0 + vmovss %xmm0, A(%rdi,%rdx,4) + vmovss %xmm0, B(%rdi,%rdx,4) + movq %rsi, %rdx + jne .LBB3_5 +# BB#6: # %polly.loop_exit5 + # in Loop: Header=BB3_8 Depth=2 + cmpq %r8, %rax + leaq 1(%rax), %rax + jle .LBB3_8 +.LBB3_3: # %omp.checkNext.backedge + # in Loop: Header=BB3_2 Depth=1 + movq %r14, %rdi + movq %r15, %rsi + callq GOMP_loop_runtime_next + vmovsd .LCPI3_0(%rip), %xmm1 + testb %al, %al + jne .LBB3_2 +.LBB3_4: # %omp.exit + callq GOMP_loop_end_nowait + addq $24, %rsp + popq %rbx + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp42: + .size init_array.omp_subfn, .Ltmp42-init_array.omp_subfn + .cfi_endproc + + .align 16, 0x90 + .type main.omp_subfn,@function +main.omp_subfn: # @main.omp_subfn + .cfi_startproc +# BB#0: # %omp.setup + pushq %rbp +.Ltmp46: + .cfi_def_cfa_offset 16 +.Ltmp47: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp48: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $24, %rsp +.Ltmp49: + .cfi_offset %rbx, -56 +.Ltmp50: + .cfi_offset %r12, -48 +.Ltmp51: + .cfi_offset %r13, -40 +.Ltmp52: + .cfi_offset %r14, -32 +.Ltmp53: + .cfi_offset %r15, -24 + leaq -48(%rbp), %rdi + leaq -56(%rbp), %rsi + callq GOMP_loop_runtime_next + testb %al, %al + je .LBB4_4 +# BB#1: + leaq -48(%rbp), %r14 + leaq -56(%rbp), %r15 + .align 16, 0x90 +.LBB4_2: # %omp.loadIVBounds + # =>This Loop Header: Depth=1 + # Child Loop BB4_6 Depth 2 + movq -56(%rbp), %r12 + leaq -1(%r12), %rcx + movq -48(%rbp), %rax + cmpq %rcx, %rax + jg .LBB4_3 +# BB#5: # %polly.loop_preheader4.preheader + # in Loop: Header=BB4_2 Depth=1 + addq $-2, %r12 + leaq (%rax,%rax,2), %rcx + leaq -1(%rax), %r13 + shlq $11, %rcx + leaq C(%rcx), %rbx + .align 16, 0x90 +.LBB4_6: # %polly.loop_preheader4 + # Parent Loop BB4_2 Depth=1 + # => This Inner Loop Header: Depth=2 + movq %rbx, %rdi + xorl %esi, %esi + movl $6144, %edx # imm = 0x1800 + callq memset + addq $6144, %rbx # imm = 0x1800 + incq %r13 + cmpq %r12, %r13 + jle .LBB4_6 +.LBB4_3: # %omp.checkNext.backedge + # in Loop: Header=BB4_2 Depth=1 + movq %r14, %rdi + movq %r15, %rsi + callq GOMP_loop_runtime_next + testb %al, %al + jne .LBB4_2 +.LBB4_4: # %omp.exit + callq GOMP_loop_end_nowait + addq $24, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp54: + .size main.omp_subfn, .Ltmp54-main.omp_subfn + .cfi_endproc + + .align 16, 0x90 + .type main.omp_subfn1,@function +main.omp_subfn1: # @main.omp_subfn1 + .cfi_startproc +# BB#0: # %omp.setup + pushq %rbp +.Ltmp58: + .cfi_def_cfa_offset 16 +.Ltmp59: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp60: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $72, %rsp +.Ltmp61: + .cfi_offset %rbx, -56 +.Ltmp62: + .cfi_offset %r12, -48 +.Ltmp63: + .cfi_offset %r13, -40 +.Ltmp64: + .cfi_offset %r14, -32 +.Ltmp65: + .cfi_offset %r15, -24 + jmp .LBB5_1 + .align 16, 0x90 +.LBB5_2: # %omp.loadIVBounds + # in Loop: Header=BB5_1 Depth=1 + movq -56(%rbp), %rax + movq %rax, -112(%rbp) # 8-byte Spill + leaq -1(%rax), %rax + movq -48(%rbp), %rcx + cmpq %rax, %rcx + jg .LBB5_1 +# BB#3: # %polly.loop_preheader4.preheader + # in Loop: Header=BB5_1 Depth=1 + leaq -1(%rcx), %rax + movq %rax, -88(%rbp) # 8-byte Spill + addq $-65, -112(%rbp) # 8-byte Folded Spill + movq %rcx, %rax + shlq $9, %rax + leaq (%rax,%rax,2), %rax + leaq C+16(,%rax,4), %rax + movq %rax, -104(%rbp) # 8-byte Spill + .align 16, 0x90 +.LBB5_7: # %polly.loop_preheader4 + # Parent Loop BB5_1 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB5_8 Depth 3 + # Child Loop BB5_9 Depth 4 + # Child Loop BB5_12 Depth 5 + # Child Loop BB5_17 Depth 6 + # Child Loop BB5_18 Depth 7 + # Child Loop BB5_14 Depth 5 + movq %rcx, -72(%rbp) # 8-byte Spill + leaq 62(%rcx), %rdi + xorl %edx, %edx + .align 16, 0x90 +.LBB5_8: # %polly.loop_preheader11 + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # => This Loop Header: Depth=3 + # Child Loop BB5_9 Depth 4 + # Child Loop BB5_12 Depth 5 + # Child Loop BB5_17 Depth 6 + # Child Loop BB5_18 Depth 7 + # Child Loop BB5_14 Depth 5 + movq %rdx, -96(%rbp) # 8-byte Spill + leaq -4(%rdx), %rcx + movq %rdx, %rax + decq %rax + cmovsq %rcx, %rax + movq %rax, %r14 + sarq $63, %r14 + shrq $62, %r14 + addq %rax, %r14 + andq $-4, %r14 + movq %rdx, %rax + orq $63, %rax + leaq -4(%rax), %rdx + movq -104(%rbp), %rcx # 8-byte Reload + leaq (%rcx,%r14,4), %rcx + movq %rcx, -80(%rbp) # 8-byte Spill + leaq B+16(,%r14,4), %rbx + leaq 4(%r14), %rcx + movq %rcx, -64(%rbp) # 8-byte Spill + xorl %r11d, %r11d + .align 16, 0x90 +.LBB5_9: # %polly.loop_header10 + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_8 Depth=3 + # => This Loop Header: Depth=4 + # Child Loop BB5_12 Depth 5 + # Child Loop BB5_17 Depth 6 + # Child Loop BB5_18 Depth 7 + # Child Loop BB5_14 Depth 5 + movabsq $9223372036854775744, %rcx # imm = 0x7FFFFFFFFFFFFFC0 + cmpq %rcx, -72(%rbp) # 8-byte Folded Reload + jg .LBB5_15 +# BB#10: # %polly.loop_header17.preheader + # in Loop: Header=BB5_9 Depth=4 + movq %r11, %r15 + orq $63, %r15 + cmpq %r15, %r11 + movq -88(%rbp), %rcx # 8-byte Reload + jle .LBB5_11 + .align 16, 0x90 +.LBB5_14: # %polly.loop_exit28.us + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_8 Depth=3 + # Parent Loop BB5_9 Depth=4 + # => This Inner Loop Header: Depth=5 + incq %rcx + cmpq %rdi, %rcx + jle .LBB5_14 + jmp .LBB5_15 + .align 16, 0x90 +.LBB5_11: # in Loop: Header=BB5_9 Depth=4 + decq %r15 + movq -80(%rbp), %r13 # 8-byte Reload + movq -72(%rbp), %rcx # 8-byte Reload + .align 16, 0x90 +.LBB5_12: # %polly.loop_header26.preheader + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_8 Depth=3 + # Parent Loop BB5_9 Depth=4 + # => This Loop Header: Depth=5 + # Child Loop BB5_17 Depth 6 + # Child Loop BB5_18 Depth 7 + cmpq %rax, -64(%rbp) # 8-byte Folded Reload + movq %rbx, %r12 + movq %r11, %r8 + jg .LBB5_13 + .align 16, 0x90 +.LBB5_17: # %polly.loop_header35.preheader + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_8 Depth=3 + # Parent Loop BB5_9 Depth=4 + # Parent Loop BB5_12 Depth=5 + # => This Loop Header: Depth=6 + # Child Loop BB5_18 Depth 7 + leaq (%rcx,%rcx,2), %rsi + shlq $11, %rsi + vbroadcastss A(%rsi,%r8,4), %xmm0 + movq %r13, %r9 + movq %r12, %r10 + movq %r14, %rsi +.LBB5_18: # %polly.loop_header35 + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_8 Depth=3 + # Parent Loop BB5_9 Depth=4 + # Parent Loop BB5_12 Depth=5 + # Parent Loop BB5_17 Depth=6 + # => This Inner Loop Header: Depth=7 + vmulps (%r10), %xmm0, %xmm1 + vaddps (%r9), %xmm1, %xmm1 + vmovaps %xmm1, (%r9) + addq $16, %r9 + addq $16, %r10 + addq $4, %rsi + cmpq %rdx, %rsi + jle .LBB5_18 +# BB#16: # %polly.loop_exit37 + # in Loop: Header=BB5_17 Depth=6 + addq $6144, %r12 # imm = 0x1800 + cmpq %r15, %r8 + leaq 1(%r8), %r8 + jle .LBB5_17 + .align 16, 0x90 +.LBB5_13: # %polly.loop_exit28 + # in Loop: Header=BB5_12 Depth=5 + addq $6144, %r13 # imm = 0x1800 + cmpq %rdi, %rcx + leaq 1(%rcx), %rcx + jle .LBB5_12 + .align 16, 0x90 +.LBB5_15: # %polly.loop_exit19 + # in Loop: Header=BB5_9 Depth=4 + addq $393216, %rbx # imm = 0x60000 + cmpq $1472, %r11 # imm = 0x5C0 + leaq 64(%r11), %r11 + jl .LBB5_9 +# BB#5: # %polly.loop_exit12 + # in Loop: Header=BB5_8 Depth=3 + movq -96(%rbp), %rdx # 8-byte Reload + cmpq $1472, %rdx # imm = 0x5C0 + leaq 64(%rdx), %rdx + jl .LBB5_8 +# BB#6: # %polly.loop_exit5 + # in Loop: Header=BB5_7 Depth=2 + addq $64, -88(%rbp) # 8-byte Folded Spill + addq $393216, -104(%rbp) # 8-byte Folded Spill + # imm = 0x60000 + movq -72(%rbp), %rcx # 8-byte Reload + cmpq -112(%rbp), %rcx # 8-byte Folded Reload + leaq 64(%rcx), %rcx + jle .LBB5_7 +.LBB5_1: # %omp.setup + # =>This Loop Header: Depth=1 + # Child Loop BB5_7 Depth 2 + # Child Loop BB5_8 Depth 3 + # Child Loop BB5_9 Depth 4 + # Child Loop BB5_12 Depth 5 + # Child Loop BB5_17 Depth 6 + # Child Loop BB5_18 Depth 7 + # Child Loop BB5_14 Depth 5 + leaq -48(%rbp), %rdi + leaq -56(%rbp), %rsi + callq GOMP_loop_runtime_next + testb %al, %al + jne .LBB5_2 +# BB#4: # %omp.exit + callq GOMP_loop_end_nowait + addq $72, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp66: + .size main.omp_subfn1, .Ltmp66-main.omp_subfn1 + .cfi_endproc + + .type A,@object # @A + .comm A,9437184,16 + .type B,@object # @B + .comm B,9437184,16 + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "%lf " + .size .L.str, 5 + + .type C,@object # @C + .comm C,9437184,16 + + .section ".note.GNU-stack","",@progbits diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe Binary files differnew file mode 100755 index 000000000000..36b788ea9ac3 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll Binary files differnew file mode 100644 index 000000000000..9d1f9ad098f9 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s new file mode 100644 index 000000000000..485d230bc398 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s @@ -0,0 +1,396 @@ + .file "matmul.polly.interchanged+tiled+vector.ll" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl init_array + .align 16, 0x90 + .type init_array,@function +init_array: # @init_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp2: + .cfi_def_cfa_offset 16 +.Ltmp3: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp4: + .cfi_def_cfa_register %rbp + xorl %r8d, %r8d + vmovsd .LCPI0_0(%rip), %xmm0 + .align 16, 0x90 +.LBB0_1: # %polly.loop_preheader3 + # =>This Loop Header: Depth=1 + # Child Loop BB0_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_2: # %polly.loop_header2 + # Parent Loop BB0_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %r8d, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %r8, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB0_2 +# BB#3: # %polly.loop_exit4 + # in Loop: Header=BB0_1 Depth=1 + incq %r8 + cmpq $1536, %r8 # imm = 0x600 + jne .LBB0_1 +# BB#4: # %polly.loop_exit + popq %rbp + ret +.Ltmp5: + .size init_array, .Ltmp5-init_array + .cfi_endproc + + .globl print_array + .align 16, 0x90 + .type print_array,@function +print_array: # @print_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp9: + .cfi_def_cfa_offset 16 +.Ltmp10: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp11: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx +.Ltmp12: + .cfi_offset %rbx, -48 +.Ltmp13: + .cfi_offset %r12, -40 +.Ltmp14: + .cfi_offset %r14, -32 +.Ltmp15: + .cfi_offset %r15, -24 + xorl %r14d, %r14d + movl $C, %r15d + .align 16, 0x90 +.LBB1_1: # %for.cond1.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB1_2 Depth 2 + movq stdout(%rip), %rax + movq %r15, %r12 + xorl %ebx, %ebx + .align 16, 0x90 +.LBB1_2: # %for.body3 + # Parent Loop BB1_1 Depth=1 + # => This Inner Loop Header: Depth=2 + vmovss (%r12), %xmm0 + vcvtss2sd %xmm0, %xmm0, %xmm0 + movq %rax, %rdi + movl $.L.str, %esi + movb $1, %al + callq fprintf + movslq %ebx, %rax + imulq $1717986919, %rax, %rcx # imm = 0x66666667 + movq %rcx, %rdx + shrq $63, %rdx + sarq $37, %rcx + addl %edx, %ecx + imull $80, %ecx, %ecx + subl %ecx, %eax + cmpl $79, %eax + jne .LBB1_4 +# BB#3: # %if.then + # in Loop: Header=BB1_2 Depth=2 + movq stdout(%rip), %rsi + movl $10, %edi + callq fputc +.LBB1_4: # %for.inc + # in Loop: Header=BB1_2 Depth=2 + addq $4, %r12 + incq %rbx + movq stdout(%rip), %rax + cmpq $1536, %rbx # imm = 0x600 + jne .LBB1_2 +# BB#5: # %for.end + # in Loop: Header=BB1_1 Depth=1 + movl $10, %edi + movq %rax, %rsi + callq fputc + addq $6144, %r15 # imm = 0x1800 + incq %r14 + cmpq $1536, %r14 # imm = 0x600 + jne .LBB1_1 +# BB#6: # %for.end12 + popq %rbx + popq %r12 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp16: + .size print_array, .Ltmp16-print_array + .cfi_endproc + + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI2_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: # @main + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp20: + .cfi_def_cfa_offset 16 +.Ltmp21: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp22: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $56, %rsp +.Ltmp23: + .cfi_offset %rbx, -56 +.Ltmp24: + .cfi_offset %r12, -48 +.Ltmp25: + .cfi_offset %r13, -40 +.Ltmp26: + .cfi_offset %r14, -32 +.Ltmp27: + .cfi_offset %r15, -24 + xorl %ebx, %ebx + vmovsd .LCPI2_0(%rip), %xmm0 + .align 16, 0x90 +.LBB2_1: # %polly.loop_preheader3.i + # =>This Loop Header: Depth=1 + # Child Loop BB2_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB2_2: # %polly.loop_header2.i + # Parent Loop BB2_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %ebx, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %rbx, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB2_2 +# BB#3: # %polly.loop_exit4.i + # in Loop: Header=BB2_1 Depth=1 + incq %rbx + cmpq $1536, %rbx # imm = 0x600 + jne .LBB2_1 +# BB#4: # %polly.loop_preheader3.preheader + movl $C, %edi + xorl %esi, %esi + movl $9437184, %edx # imm = 0x900000 + callq memset + xorl %esi, %esi + movl $C+16, %eax + movq %rax, -88(%rbp) # 8-byte Spill + .align 16, 0x90 +.LBB2_5: # %polly.loop_preheader17 + # =>This Loop Header: Depth=1 + # Child Loop BB2_15 Depth 2 + # Child Loop BB2_8 Depth 3 + # Child Loop BB2_11 Depth 4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + movq %rsi, -56(%rbp) # 8-byte Spill + movq %rsi, %rax + orq $63, %rax + movq %rax, -72(%rbp) # 8-byte Spill + leaq -1(%rax), %rax + movq %rax, -48(%rbp) # 8-byte Spill + xorl %edx, %edx + .align 16, 0x90 +.LBB2_15: # %polly.loop_preheader24 + # Parent Loop BB2_5 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB2_8 Depth 3 + # Child Loop BB2_11 Depth 4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + movq %rdx, -80(%rbp) # 8-byte Spill + leaq -4(%rdx), %rcx + movq %rdx, %rax + decq %rax + cmovsq %rcx, %rax + movq %rax, %r15 + sarq $63, %r15 + shrq $62, %r15 + addq %rax, %r15 + andq $-4, %r15 + movq %rdx, %r13 + orq $63, %r13 + leaq -4(%r13), %rdx + xorl %r10d, %r10d + movq -88(%rbp), %rax # 8-byte Reload + leaq (%rax,%r15,4), %rax + movq %rax, -64(%rbp) # 8-byte Spill + leaq B+16(,%r15,4), %rbx + leaq 4(%r15), %r12 + .align 16, 0x90 +.LBB2_8: # %polly.loop_header23 + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # => This Loop Header: Depth=3 + # Child Loop BB2_11 Depth 4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + cmpq -72(%rbp), %rsi # 8-byte Folded Reload + jg .LBB2_13 +# BB#9: # %polly.loop_header30.preheader + # in Loop: Header=BB2_8 Depth=3 + movq %r10, %rax + orq $63, %rax + cmpq %rax, %r10 + jg .LBB2_13 +# BB#10: # in Loop: Header=BB2_8 Depth=3 + decq %rax + movq -64(%rbp), %r14 # 8-byte Reload + movq -56(%rbp), %r11 # 8-byte Reload + .align 16, 0x90 +.LBB2_11: # %polly.loop_header37.preheader + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # Parent Loop BB2_8 Depth=3 + # => This Loop Header: Depth=4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + cmpq %r13, %r12 + movq %rbx, %r8 + movq %r10, %rsi + jg .LBB2_12 + .align 16, 0x90 +.LBB2_17: # %polly.loop_header46.preheader + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # Parent Loop BB2_8 Depth=3 + # Parent Loop BB2_11 Depth=4 + # => This Loop Header: Depth=5 + # Child Loop BB2_18 Depth 6 + leaq (%r11,%r11,2), %rcx + shlq $11, %rcx + vbroadcastss A(%rcx,%rsi,4), %xmm0 + movq %r14, %rdi + movq %r8, %r9 + movq %r15, %rcx +.LBB2_18: # %polly.loop_header46 + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # Parent Loop BB2_8 Depth=3 + # Parent Loop BB2_11 Depth=4 + # Parent Loop BB2_17 Depth=5 + # => This Inner Loop Header: Depth=6 + vmulps (%r9), %xmm0, %xmm1 + vaddps (%rdi), %xmm1, %xmm1 + vmovaps %xmm1, (%rdi) + addq $16, %rdi + addq $16, %r9 + addq $4, %rcx + cmpq %rdx, %rcx + jle .LBB2_18 +# BB#16: # %polly.loop_exit48 + # in Loop: Header=BB2_17 Depth=5 + addq $6144, %r8 # imm = 0x1800 + cmpq %rax, %rsi + leaq 1(%rsi), %rsi + jle .LBB2_17 + .align 16, 0x90 +.LBB2_12: # %polly.loop_exit39 + # in Loop: Header=BB2_11 Depth=4 + addq $6144, %r14 # imm = 0x1800 + cmpq -48(%rbp), %r11 # 8-byte Folded Reload + leaq 1(%r11), %r11 + jle .LBB2_11 + .align 16, 0x90 +.LBB2_13: # %polly.loop_exit32 + # in Loop: Header=BB2_8 Depth=3 + addq $393216, %rbx # imm = 0x60000 + cmpq $1472, %r10 # imm = 0x5C0 + leaq 64(%r10), %r10 + movq -56(%rbp), %rsi # 8-byte Reload + jl .LBB2_8 +# BB#14: # %polly.loop_exit25 + # in Loop: Header=BB2_15 Depth=2 + movq -80(%rbp), %rdx # 8-byte Reload + cmpq $1472, %rdx # imm = 0x5C0 + leaq 64(%rdx), %rdx + jl .LBB2_15 +# BB#6: # %polly.loop_exit18 + # in Loop: Header=BB2_5 Depth=1 + addq $393216, -88(%rbp) # 8-byte Folded Spill + # imm = 0x60000 + cmpq $1472, %rsi # imm = 0x5C0 + leaq 64(%rsi), %rsi + jl .LBB2_5 +# BB#7: # %polly.loop_exit11 + xorl %eax, %eax + addq $56, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp28: + .size main, .Ltmp28-main + .cfi_endproc + + .type A,@object # @A + .comm A,9437184,16 + .type B,@object # @B + .comm B,9437184,16 + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "%lf " + .size .L.str, 5 + + .type C,@object # @C + .comm C,9437184,16 + + .section ".note.GNU-stack","",@progbits diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.exe Binary files differnew file mode 100755 index 000000000000..fbd8b128fd88 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.exe diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.ll Binary files differnew file mode 100644 index 000000000000..acdd95f3bc4c --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.ll diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s new file mode 100644 index 000000000000..f7ab7fdd59cc --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s @@ -0,0 +1,390 @@ + .file "matmul.polly.interchanged+tiled.ll" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl init_array + .align 16, 0x90 + .type init_array,@function +init_array: # @init_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp2: + .cfi_def_cfa_offset 16 +.Ltmp3: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp4: + .cfi_def_cfa_register %rbp + xorl %r8d, %r8d + vmovsd .LCPI0_0(%rip), %xmm0 + .align 16, 0x90 +.LBB0_1: # %polly.loop_preheader3 + # =>This Loop Header: Depth=1 + # Child Loop BB0_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_2: # %polly.loop_header2 + # Parent Loop BB0_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %r8d, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %r8, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB0_2 +# BB#3: # %polly.loop_exit4 + # in Loop: Header=BB0_1 Depth=1 + incq %r8 + cmpq $1536, %r8 # imm = 0x600 + jne .LBB0_1 +# BB#4: # %polly.loop_exit + popq %rbp + ret +.Ltmp5: + .size init_array, .Ltmp5-init_array + .cfi_endproc + + .globl print_array + .align 16, 0x90 + .type print_array,@function +print_array: # @print_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp9: + .cfi_def_cfa_offset 16 +.Ltmp10: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp11: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx +.Ltmp12: + .cfi_offset %rbx, -48 +.Ltmp13: + .cfi_offset %r12, -40 +.Ltmp14: + .cfi_offset %r14, -32 +.Ltmp15: + .cfi_offset %r15, -24 + xorl %r14d, %r14d + movl $C, %r15d + .align 16, 0x90 +.LBB1_1: # %for.cond1.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB1_2 Depth 2 + movq stdout(%rip), %rax + movq %r15, %r12 + xorl %ebx, %ebx + .align 16, 0x90 +.LBB1_2: # %for.body3 + # Parent Loop BB1_1 Depth=1 + # => This Inner Loop Header: Depth=2 + vmovss (%r12), %xmm0 + vcvtss2sd %xmm0, %xmm0, %xmm0 + movq %rax, %rdi + movl $.L.str, %esi + movb $1, %al + callq fprintf + movslq %ebx, %rax + imulq $1717986919, %rax, %rcx # imm = 0x66666667 + movq %rcx, %rdx + shrq $63, %rdx + sarq $37, %rcx + addl %edx, %ecx + imull $80, %ecx, %ecx + subl %ecx, %eax + cmpl $79, %eax + jne .LBB1_4 +# BB#3: # %if.then + # in Loop: Header=BB1_2 Depth=2 + movq stdout(%rip), %rsi + movl $10, %edi + callq fputc +.LBB1_4: # %for.inc + # in Loop: Header=BB1_2 Depth=2 + addq $4, %r12 + incq %rbx + movq stdout(%rip), %rax + cmpq $1536, %rbx # imm = 0x600 + jne .LBB1_2 +# BB#5: # %for.end + # in Loop: Header=BB1_1 Depth=1 + movl $10, %edi + movq %rax, %rsi + callq fputc + addq $6144, %r15 # imm = 0x1800 + incq %r14 + cmpq $1536, %r14 # imm = 0x600 + jne .LBB1_1 +# BB#6: # %for.end12 + popq %rbx + popq %r12 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp16: + .size print_array, .Ltmp16-print_array + .cfi_endproc + + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI2_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: # @main + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp20: + .cfi_def_cfa_offset 16 +.Ltmp21: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp22: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $56, %rsp +.Ltmp23: + .cfi_offset %rbx, -56 +.Ltmp24: + .cfi_offset %r12, -48 +.Ltmp25: + .cfi_offset %r13, -40 +.Ltmp26: + .cfi_offset %r14, -32 +.Ltmp27: + .cfi_offset %r15, -24 + xorl %ebx, %ebx + vmovsd .LCPI2_0(%rip), %xmm0 + .align 16, 0x90 +.LBB2_1: # %polly.loop_preheader3.i + # =>This Loop Header: Depth=1 + # Child Loop BB2_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB2_2: # %polly.loop_header2.i + # Parent Loop BB2_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %ebx, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %rbx, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB2_2 +# BB#3: # %polly.loop_exit4.i + # in Loop: Header=BB2_1 Depth=1 + incq %rbx + cmpq $1536, %rbx # imm = 0x600 + jne .LBB2_1 +# BB#4: # %polly.loop_preheader3.preheader + movl $C, %ebx + movl $C, %edi + xorl %esi, %esi + movl $9437184, %edx # imm = 0x900000 + callq memset + xorl %eax, %eax + .align 16, 0x90 +.LBB2_5: # %polly.loop_preheader17 + # =>This Loop Header: Depth=1 + # Child Loop BB2_15 Depth 2 + # Child Loop BB2_8 Depth 3 + # Child Loop BB2_11 Depth 4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + movq %rax, -56(%rbp) # 8-byte Spill + movq %rbx, -88(%rbp) # 8-byte Spill + movq %rax, %rcx + orq $63, %rcx + movq %rcx, -72(%rbp) # 8-byte Spill + leaq -1(%rcx), %rcx + movq %rcx, -48(%rbp) # 8-byte Spill + movq $-1, %r15 + movl $B, %ecx + movq %rbx, -64(%rbp) # 8-byte Spill + xorl %r12d, %r12d + .align 16, 0x90 +.LBB2_15: # %polly.loop_preheader24 + # Parent Loop BB2_5 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB2_8 Depth 3 + # Child Loop BB2_11 Depth 4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + movq %rcx, -80(%rbp) # 8-byte Spill + movq %r12, %r13 + orq $63, %r13 + leaq -1(%r13), %rbx + xorl %r9d, %r9d + movq %rcx, %rdx + .align 16, 0x90 +.LBB2_8: # %polly.loop_header23 + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # => This Loop Header: Depth=3 + # Child Loop BB2_11 Depth 4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + cmpq -72(%rbp), %rax # 8-byte Folded Reload + jg .LBB2_13 +# BB#9: # %polly.loop_header30.preheader + # in Loop: Header=BB2_8 Depth=3 + movq %r9, %rax + orq $63, %rax + cmpq %rax, %r9 + jg .LBB2_13 +# BB#10: # in Loop: Header=BB2_8 Depth=3 + decq %rax + movq -64(%rbp), %r10 # 8-byte Reload + movq -56(%rbp), %r11 # 8-byte Reload + .align 16, 0x90 +.LBB2_11: # %polly.loop_header37.preheader + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # Parent Loop BB2_8 Depth=3 + # => This Loop Header: Depth=4 + # Child Loop BB2_17 Depth 5 + # Child Loop BB2_18 Depth 6 + cmpq %r13, %r12 + movq %rdx, %r14 + movq %r9, %rcx + jg .LBB2_12 + .align 16, 0x90 +.LBB2_17: # %polly.loop_header46.preheader + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # Parent Loop BB2_8 Depth=3 + # Parent Loop BB2_11 Depth=4 + # => This Loop Header: Depth=5 + # Child Loop BB2_18 Depth 6 + leaq (%r11,%r11,2), %rsi + shlq $11, %rsi + vmovss A(%rsi,%rcx,4), %xmm0 + movq %r10, %rdi + movq %r14, %r8 + movq %r15, %rsi +.LBB2_18: # %polly.loop_header46 + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_15 Depth=2 + # Parent Loop BB2_8 Depth=3 + # Parent Loop BB2_11 Depth=4 + # Parent Loop BB2_17 Depth=5 + # => This Inner Loop Header: Depth=6 + vmulss (%r8), %xmm0, %xmm1 + vaddss (%rdi), %xmm1, %xmm1 + vmovss %xmm1, (%rdi) + addq $4, %rdi + addq $4, %r8 + incq %rsi + cmpq %rbx, %rsi + jle .LBB2_18 +# BB#16: # %polly.loop_exit48 + # in Loop: Header=BB2_17 Depth=5 + addq $6144, %r14 # imm = 0x1800 + cmpq %rax, %rcx + leaq 1(%rcx), %rcx + jle .LBB2_17 + .align 16, 0x90 +.LBB2_12: # %polly.loop_exit39 + # in Loop: Header=BB2_11 Depth=4 + addq $6144, %r10 # imm = 0x1800 + cmpq -48(%rbp), %r11 # 8-byte Folded Reload + leaq 1(%r11), %r11 + jle .LBB2_11 + .align 16, 0x90 +.LBB2_13: # %polly.loop_exit32 + # in Loop: Header=BB2_8 Depth=3 + addq $393216, %rdx # imm = 0x60000 + cmpq $1472, %r9 # imm = 0x5C0 + leaq 64(%r9), %r9 + movq -56(%rbp), %rax # 8-byte Reload + jl .LBB2_8 +# BB#14: # %polly.loop_exit25 + # in Loop: Header=BB2_15 Depth=2 + addq $256, -64(%rbp) # 8-byte Folded Spill + # imm = 0x100 + movq -80(%rbp), %rcx # 8-byte Reload + addq $256, %rcx # imm = 0x100 + addq $64, %r15 + cmpq $1472, %r12 # imm = 0x5C0 + leaq 64(%r12), %r12 + jl .LBB2_15 +# BB#6: # %polly.loop_exit18 + # in Loop: Header=BB2_5 Depth=1 + movq -88(%rbp), %rbx # 8-byte Reload + addq $393216, %rbx # imm = 0x60000 + cmpq $1472, %rax # imm = 0x5C0 + leaq 64(%rax), %rax + jl .LBB2_5 +# BB#7: # %polly.loop_exit11 + xorl %eax, %eax + addq $56, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp28: + .size main, .Ltmp28-main + .cfi_endproc + + .type A,@object # @A + .comm A,9437184,16 + .type B,@object # @B + .comm B,9437184,16 + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "%lf " + .size .L.str, 5 + + .type C,@object # @C + .comm C,9437184,16 + + .section ".note.GNU-stack","",@progbits diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged.exe Binary files differnew file mode 100755 index 000000000000..240c95a7f790 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged.exe diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged.ll Binary files differnew file mode 100644 index 000000000000..52fbccc7ed5c --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged.ll diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged.s b/polly/docs/experiments/matmul/matmul.polly.interchanged.s new file mode 100644 index 000000000000..a764da0b3f22 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.polly.interchanged.s @@ -0,0 +1,286 @@ + .file "matmul.polly.interchanged.ll" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl init_array + .align 16, 0x90 + .type init_array,@function +init_array: # @init_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp2: + .cfi_def_cfa_offset 16 +.Ltmp3: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp4: + .cfi_def_cfa_register %rbp + xorl %r8d, %r8d + vmovsd .LCPI0_0(%rip), %xmm0 + .align 16, 0x90 +.LBB0_1: # %polly.loop_preheader3 + # =>This Loop Header: Depth=1 + # Child Loop BB0_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_2: # %polly.loop_header2 + # Parent Loop BB0_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %r8d, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %r8, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB0_2 +# BB#3: # %polly.loop_exit4 + # in Loop: Header=BB0_1 Depth=1 + incq %r8 + cmpq $1536, %r8 # imm = 0x600 + jne .LBB0_1 +# BB#4: # %polly.loop_exit + popq %rbp + ret +.Ltmp5: + .size init_array, .Ltmp5-init_array + .cfi_endproc + + .globl print_array + .align 16, 0x90 + .type print_array,@function +print_array: # @print_array + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp9: + .cfi_def_cfa_offset 16 +.Ltmp10: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp11: + .cfi_def_cfa_register %rbp + pushq %r15 + pushq %r14 + pushq %r12 + pushq %rbx +.Ltmp12: + .cfi_offset %rbx, -48 +.Ltmp13: + .cfi_offset %r12, -40 +.Ltmp14: + .cfi_offset %r14, -32 +.Ltmp15: + .cfi_offset %r15, -24 + xorl %r14d, %r14d + movl $C, %r15d + .align 16, 0x90 +.LBB1_1: # %for.cond1.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB1_2 Depth 2 + movq stdout(%rip), %rax + movq %r15, %r12 + xorl %ebx, %ebx + .align 16, 0x90 +.LBB1_2: # %for.body3 + # Parent Loop BB1_1 Depth=1 + # => This Inner Loop Header: Depth=2 + vmovss (%r12), %xmm0 + vcvtss2sd %xmm0, %xmm0, %xmm0 + movq %rax, %rdi + movl $.L.str, %esi + movb $1, %al + callq fprintf + movslq %ebx, %rax + imulq $1717986919, %rax, %rcx # imm = 0x66666667 + movq %rcx, %rdx + shrq $63, %rdx + sarq $37, %rcx + addl %edx, %ecx + imull $80, %ecx, %ecx + subl %ecx, %eax + cmpl $79, %eax + jne .LBB1_4 +# BB#3: # %if.then + # in Loop: Header=BB1_2 Depth=2 + movq stdout(%rip), %rsi + movl $10, %edi + callq fputc +.LBB1_4: # %for.inc + # in Loop: Header=BB1_2 Depth=2 + addq $4, %r12 + incq %rbx + movq stdout(%rip), %rax + cmpq $1536, %rbx # imm = 0x600 + jne .LBB1_2 +# BB#5: # %for.end + # in Loop: Header=BB1_1 Depth=1 + movl $10, %edi + movq %rax, %rsi + callq fputc + addq $6144, %r15 # imm = 0x1800 + incq %r14 + cmpq $1536, %r14 # imm = 0x600 + jne .LBB1_1 +# BB#6: # %for.end12 + popq %rbx + popq %r12 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp16: + .size print_array, .Ltmp16-print_array + .cfi_endproc + + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI2_0: + .quad 4602678819172646912 # double 0.5 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: # @main + .cfi_startproc +# BB#0: # %entry + pushq %rbp +.Ltmp20: + .cfi_def_cfa_offset 16 +.Ltmp21: + .cfi_offset %rbp, -16 + movq %rsp, %rbp +.Ltmp22: + .cfi_def_cfa_register %rbp + pushq %r14 + pushq %rbx +.Ltmp23: + .cfi_offset %rbx, -32 +.Ltmp24: + .cfi_offset %r14, -24 + xorl %ebx, %ebx + vmovsd .LCPI2_0(%rip), %xmm0 + .align 16, 0x90 +.LBB2_1: # %polly.loop_preheader3.i + # =>This Loop Header: Depth=1 + # Child Loop BB2_2 Depth 2 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB2_2: # %polly.loop_header2.i + # Parent Loop BB2_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movl %ecx, %edx + imull %ebx, %edx + movl %edx, %esi + sarl $31, %esi + shrl $22, %esi + addl %edx, %esi + andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00 + negl %esi + movq %rbx, %rax + shlq $11, %rax + leal 1(%rdx,%rsi), %edi + leaq (%rax,%rax,2), %rsi + leaq 1(%rcx), %rdx + cmpq $1536, %rdx # imm = 0x600 + vcvtsi2sdl %edi, %xmm0, %xmm1 + vmulsd %xmm0, %xmm1, %xmm1 + vcvtsd2ss %xmm1, %xmm1, %xmm1 + vmovss %xmm1, A(%rsi,%rcx,4) + vmovss %xmm1, B(%rsi,%rcx,4) + movq %rdx, %rcx + jne .LBB2_2 +# BB#3: # %polly.loop_exit4.i + # in Loop: Header=BB2_1 Depth=1 + incq %rbx + cmpq $1536, %rbx # imm = 0x600 + jne .LBB2_1 +# BB#4: # %polly.loop_preheader3.preheader + movl $C, %r14d + movl $C, %edi + xorl %esi, %esi + movl $9437184, %edx # imm = 0x900000 + callq memset + xorl %eax, %eax + .align 16, 0x90 +.LBB2_5: # %polly.loop_preheader17 + # =>This Loop Header: Depth=1 + # Child Loop BB2_10 Depth 2 + # Child Loop BB2_8 Depth 3 + movl $B, %ebx + xorl %edx, %edx + .align 16, 0x90 +.LBB2_10: # %polly.loop_preheader24 + # Parent Loop BB2_5 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB2_8 Depth 3 + leaq (%rax,%rax,2), %rcx + shlq $11, %rcx + vmovss A(%rcx,%rdx,4), %xmm0 + movl $1536, %esi # imm = 0x600 + movq %r14, %rdi + movq %rbx, %rcx + .align 16, 0x90 +.LBB2_8: # %polly.loop_header23 + # Parent Loop BB2_5 Depth=1 + # Parent Loop BB2_10 Depth=2 + # => This Inner Loop Header: Depth=3 + vmulss (%rcx), %xmm0, %xmm1 + vaddss (%rdi), %xmm1, %xmm1 + vmovss %xmm1, (%rdi) + addq $4, %rdi + addq $4, %rcx + decq %rsi + jne .LBB2_8 +# BB#9: # %polly.loop_exit25 + # in Loop: Header=BB2_10 Depth=2 + addq $6144, %rbx # imm = 0x1800 + incq %rdx + cmpq $1536, %rdx # imm = 0x600 + jne .LBB2_10 +# BB#6: # %polly.loop_exit18 + # in Loop: Header=BB2_5 Depth=1 + addq $6144, %r14 # imm = 0x1800 + incq %rax + cmpq $1536, %rax # imm = 0x600 + jne .LBB2_5 +# BB#7: # %polly.loop_exit11 + xorl %eax, %eax + popq %rbx + popq %r14 + popq %rbp + ret +.Ltmp25: + .size main, .Ltmp25-main + .cfi_endproc + + .type A,@object # @A + .comm A,9437184,16 + .type B,@object # @B + .comm B,9437184,16 + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "%lf " + .size .L.str, 5 + + .type C,@object # @C + .comm C,9437184,16 + + .section ".note.GNU-stack","",@progbits diff --git a/polly/docs/experiments/matmul/matmul.preopt.ll b/polly/docs/experiments/matmul/matmul.preopt.ll new file mode 100644 index 000000000000..db5366425740 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.preopt.ll @@ -0,0 +1,171 @@ +; ModuleID = 'matmul.s' +source_filename = "matmul.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1536 x [1536 x float]] zeroinitializer, align 16 +@B = common global [1536 x [1536 x float]] zeroinitializer, align 16 +@stdout = external global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1 +@C = common global [1536 x [1536 x float]] zeroinitializer, align 16 +@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define void @init_array() #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry.split, %for.inc17 + %indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5 + %1 = trunc i64 %0 to i32 + %rem = srem i32 %1, 1024 + %add = add nsw i32 %rem, 1 + %conv = sitofp i32 %add to double + %div = fmul double %conv, 5.000000e-01 + %conv4 = fptrunc double %div to float + %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv + store float %conv4, float* %arrayidx6, align 4 + %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5 + %3 = trunc i64 %2 to i32 + %rem8 = srem i32 %3, 1024 + %add9 = add nsw i32 %rem8, 1 + %conv10 = sitofp i32 %add9 to double + %div11 = fmul double %conv10, 5.000000e-01 + %conv12 = fptrunc double %div11 to float + %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv + store float %conv12, float* %arrayidx16, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1536 + br i1 %exitcond, label %for.body3, label %for.inc17 + +for.inc17: ; preds = %for.body3 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %exitcond7 = icmp ne i64 %indvars.iv.next6, 1536 + br i1 %exitcond7, label %for.cond1.preheader, label %for.end19 + +for.end19: ; preds = %for.inc17 + ret void +} + +; Function Attrs: nounwind uwtable +define void @print_array() #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry.split, %for.end + %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ] + %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.inc + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ] + %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ] + %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv + %2 = load float, float* %arrayidx5, align 4 + %conv = fpext float %2 to double + %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2 + %3 = trunc i64 %indvars.iv to i32 + %rem = srem i32 %3, 80 + %cmp6 = icmp eq i32 %rem, 79 + br i1 %cmp6, label %if.then, label %for.inc + +if.then: ; preds = %for.body3 + %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 + %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4) + br label %for.inc + +for.inc: ; preds = %for.body3, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 + %exitcond = icmp ne i64 %indvars.iv.next, 1536 + br i1 %exitcond, label %for.body3, label %for.end + +for.end: ; preds = %for.inc + %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ] + %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa) + %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1 + %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536 + br i1 %exitcond8, label %for.cond1.preheader, label %for.end12 + +for.end12: ; preds = %for.end + ret void +} + +declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 + +; Function Attrs: nounwind uwtable +define i32 @main() #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + tail call void @init_array() + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry.split, %for.inc28 + %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.inc25 + %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5, %for.inc25 ] + %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4 + store float 0.000000e+00, float* %arrayidx5, align 4 + br label %for.body8 + +for.body8: ; preds = %for.body3, %for.body8 + %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ] + %arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4 + %0 = load float, float* %arrayidx12, align 4 + %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv + %1 = load float, float* %arrayidx16, align 4 + %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4 + %2 = load float, float* %arrayidx20, align 4 + %mul = fmul float %1, %2 + %add = fadd float %0, %mul + %arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4 + store float %add, float* %arrayidx24, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1536 + br i1 %exitcond, label %for.body8, label %for.inc25 + +for.inc25: ; preds = %for.body8 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536 + br i1 %exitcond6, label %for.body3, label %for.inc28 + +for.inc28: ; preds = %for.inc25 + %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 + %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536 + br i1 %exitcond9, label %for.cond1.preheader, label %for.end30 + +for.end30: ; preds = %for.inc28 + ret i32 0 +} + +; Function Attrs: nounwind +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #2 + +; Function Attrs: nounwind +declare i32 @fputc(i32, %struct._IO_FILE* nocapture) #2 + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"} diff --git a/polly/docs/experiments/matmul/matmul.s b/polly/docs/experiments/matmul/matmul.s new file mode 100644 index 000000000000..17147be24476 --- /dev/null +++ b/polly/docs/experiments/matmul/matmul.s @@ -0,0 +1,269 @@ +; ModuleID = 'matmul.c' +source_filename = "matmul.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1536 x [1536 x float]] zeroinitializer, align 16 +@B = common global [1536 x [1536 x float]] zeroinitializer, align 16 +@stdout = external global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1 +@C = common global [1536 x [1536 x float]] zeroinitializer, align 16 +@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define void @init_array() #0 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc17, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 1536 + br i1 %cmp, label %for.body, label %for.end19 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 1536 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %2 = load i32, i32* %i, align 4 + %3 = load i32, i32* %j, align 4 + %mul = mul nsw i32 %2, %3 + %rem = srem i32 %mul, 1024 + %add = add nsw i32 1, %rem + %conv = sitofp i32 %add to double + %div = fdiv double %conv, 2.000000e+00 + %conv4 = fptrunc double %div to float + %4 = load i32, i32* %j, align 4 + %idxprom = sext i32 %4 to i64 + %5 = load i32, i32* %i, align 4 + %idxprom5 = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5 + %arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom + store float %conv4, float* %arrayidx6, align 4 + %6 = load i32, i32* %i, align 4 + %7 = load i32, i32* %j, align 4 + %mul7 = mul nsw i32 %6, %7 + %rem8 = srem i32 %mul7, 1024 + %add9 = add nsw i32 1, %rem8 + %conv10 = sitofp i32 %add9 to double + %div11 = fdiv double %conv10, 2.000000e+00 + %conv12 = fptrunc double %div11 to float + %8 = load i32, i32* %j, align 4 + %idxprom13 = sext i32 %8 to i64 + %9 = load i32, i32* %i, align 4 + %idxprom14 = sext i32 %9 to i64 + %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14 + %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13 + store float %conv12, float* %arrayidx16, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %10 = load i32, i32* %j, align 4 + %inc = add nsw i32 %10, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc17 + +for.inc17: ; preds = %for.end + %11 = load i32, i32* %i, align 4 + %inc18 = add nsw i32 %11, 1 + store i32 %inc18, i32* %i, align 4 + br label %for.cond + +for.end19: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind uwtable +define void @print_array() #0 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 1536 + br i1 %cmp, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 1536 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 + %3 = load i32, i32* %j, align 4 + %idxprom = sext i32 %3 to i64 + %4 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4 + %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom + %5 = load float, float* %arrayidx5, align 4 + %conv = fpext float %5 to double + %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv) + %6 = load i32, i32* %j, align 4 + %rem = srem i32 %6, 80 + %cmp6 = icmp eq i32 %rem, 79 + br i1 %cmp6, label %if.then, label %if.end + +if.then: ; preds = %for.body3 + %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 + %call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) + br label %if.end + +if.end: ; preds = %if.then, %for.body3 + br label %for.inc + +for.inc: ; preds = %if.end + %8 = load i32, i32* %j, align 4 + %inc = add nsw i32 %8, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %9 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 + %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) + br label %for.inc10 + +for.inc10: ; preds = %for.end + %10 = load i32, i32* %i, align 4 + %inc11 = add nsw i32 %10, 1 + store i32 %inc11, i32* %i, align 4 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} + +declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 + +; Function Attrs: nounwind uwtable +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %t_start = alloca double, align 8 + %t_end = alloca double, align 8 + store i32 0, i32* %retval, align 4 + call void @init_array() + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc28, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 1536 + br i1 %cmp, label %for.body, label %for.end30 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc25, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 1536 + br i1 %cmp2, label %for.body3, label %for.end27 + +for.body3: ; preds = %for.cond1 + %2 = load i32, i32* %j, align 4 + %idxprom = sext i32 %2 to i64 + %3 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4 + %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom + store float 0.000000e+00, float* %arrayidx5, align 4 + store i32 0, i32* %k, align 4 + br label %for.cond6 + +for.cond6: ; preds = %for.inc, %for.body3 + %4 = load i32, i32* %k, align 4 + %cmp7 = icmp slt i32 %4, 1536 + br i1 %cmp7, label %for.body8, label %for.end + +for.body8: ; preds = %for.cond6 + %5 = load i32, i32* %j, align 4 + %idxprom9 = sext i32 %5 to i64 + %6 = load i32, i32* %i, align 4 + %idxprom10 = sext i32 %6 to i64 + %arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10 + %arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9 + %7 = load float, float* %arrayidx12, align 4 + %8 = load i32, i32* %k, align 4 + %idxprom13 = sext i32 %8 to i64 + %9 = load i32, i32* %i, align 4 + %idxprom14 = sext i32 %9 to i64 + %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14 + %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13 + %10 = load float, float* %arrayidx16, align 4 + %11 = load i32, i32* %j, align 4 + %idxprom17 = sext i32 %11 to i64 + %12 = load i32, i32* %k, align 4 + %idxprom18 = sext i32 %12 to i64 + %arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18 + %arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17 + %13 = load float, float* %arrayidx20, align 4 + %mul = fmul float %10, %13 + %add = fadd float %7, %mul + %14 = load i32, i32* %j, align 4 + %idxprom21 = sext i32 %14 to i64 + %15 = load i32, i32* %i, align 4 + %idxprom22 = sext i32 %15 to i64 + %arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22 + %arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21 + store float %add, float* %arrayidx24, align 4 + br label %for.inc + +for.inc: ; preds = %for.body8 + %16 = load i32, i32* %k, align 4 + %inc = add nsw i32 %16, 1 + store i32 %inc, i32* %k, align 4 + br label %for.cond6 + +for.end: ; preds = %for.cond6 + br label %for.inc25 + +for.inc25: ; preds = %for.end + %17 = load i32, i32* %j, align 4 + %inc26 = add nsw i32 %17, 1 + store i32 %inc26, i32* %j, align 4 + br label %for.cond1 + +for.end27: ; preds = %for.cond1 + br label %for.inc28 + +for.inc28: ; preds = %for.end27 + %18 = load i32, i32* %i, align 4 + %inc29 = add nsw i32 %18, 1 + store i32 %inc29, i32* %i, align 4 + br label %for.cond + +for.end30: ; preds = %for.cond + ret i32 0 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"} diff --git a/polly/docs/experiments/matmul/runall.sh b/polly/docs/experiments/matmul/runall.sh new file mode 100755 index 000000000000..575b58f98246 --- /dev/null +++ b/polly/docs/experiments/matmul/runall.sh @@ -0,0 +1,95 @@ +#!/bin/sh -a + +echo "--> 1. Create LLVM-IR from C" +clang -S -emit-llvm matmul.c -o matmul.s + +echo "--> 2. Prepare the LLVM-IR for Polly" +opt -S -polly-canonicalize matmul.s > matmul.preopt.ll + +echo "--> 3. Show the SCoPs detected by Polly" +opt -basicaa -polly-ast -analyze -q matmul.preopt.ll \ + -polly-process-unprofitable + +echo "--> 4.1 Highlight the detected SCoPs in the CFGs of the program" +# We only create .dot files, as directly -view-scops directly calls graphviz +# which would require user interaction to continue the script. +# opt -basicaa -view-scops -disable-output matmul.preopt.ll +opt -basicaa -dot-scops -disable-output matmul.preopt.ll + +echo "--> 4.2 Highlight the detected SCoPs in the CFGs of the program (print \ +no instructions)" +# We only create .dot files, as directly -view-scops-only directly calls +# graphviz which would require user interaction to continue the script. +# opt -basicaa -view-scops-only -disable-output matmul.preopt.ll +opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll + +echo "--> 4.3 Create .png files from the .dot files" +for i in `ls *.dot`; do dot -Tpng $i > $i.png; done + +echo "--> 5. View the polyhedral representation of the SCoPs" +opt -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable + +echo "--> 6. Show the dependences for the SCoPs" +opt -basicaa -polly-dependences -analyze matmul.preopt.ll \ + -polly-process-unprofitable + +echo "--> 7. Export jscop files" +opt -basicaa -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable + +echo "--> 8. Import the updated jscop files and print the new SCoPs. (optional)" +opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \ + -polly-process-unprofitable +opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \ + -polly-import-jscop-postfix=interchanged -polly-process-unprofitable +opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \ + -polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable +opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \ + -polly-import-jscop-postfix=interchanged+tiled+vector \ + -polly-process-unprofitable + +echo "--> 9. Codegenerate the SCoPs" +opt -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \ + -polly-codegen -polly-process-unprofitable\ + matmul.preopt.ll | opt -O3 > matmul.polly.interchanged.ll +opt -basicaa -polly-import-jscop \ + -polly-import-jscop-postfix=interchanged+tiled -polly-codegen \ + matmul.preopt.ll -polly-process-unprofitable \ + | opt -O3 > matmul.polly.interchanged+tiled.ll +opt -basicaa -polly-import-jscop -polly-process-unprofitable\ + -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \ + matmul.preopt.ll -polly-vectorizer=polly\ + | opt -O3 > matmul.polly.interchanged+tiled+vector.ll +opt -basicaa -polly-import-jscop -polly-process-unprofitable\ + -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \ + matmul.preopt.ll -polly-vectorizer=polly -polly-parallel\ + | opt -O3 > matmul.polly.interchanged+tiled+vector+openmp.ll +opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll + +echo "--> 10. Create the executables" +llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s \ + -o matmul.polly.interchanged.exe +llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s \ + -o matmul.polly.interchanged+tiled.exe +llc matmul.polly.interchanged+tiled+vector.ll \ + -o matmul.polly.interchanged+tiled+vector.s \ + && gcc matmul.polly.interchanged+tiled+vector.s \ + -o matmul.polly.interchanged+tiled+vector.exe +llc matmul.polly.interchanged+tiled+vector+openmp.ll \ + -o matmul.polly.interchanged+tiled+vector+openmp.s \ + && gcc -lgomp matmul.polly.interchanged+tiled+vector+openmp.s \ + -o matmul.polly.interchanged+tiled+vector+openmp.exe +llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s \ + -o matmul.normalopt.exe + +echo "--> 11. Compare the runtime of the executables" + +echo "time ./matmul.normalopt.exe" +time -f "%E real, %U user, %S sys" ./matmul.normalopt.exe +echo "time ./matmul.polly.interchanged.exe" +time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged.exe +echo "time ./matmul.polly.interchanged+tiled.exe" +time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged+tiled.exe +echo "time ./matmul.polly.interchanged+tiled+vector.exe" +time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged+tiled+vector.exe +echo "time ./matmul.polly.interchanged+tiled+vector+openmp.exe" +time -f "%E real, %U user, %S sys" ./matmul.polly.interchanged+tiled+vector+openmp.exe diff --git a/polly/docs/experiments/matmul/scops.init_array.dot b/polly/docs/experiments/matmul/scops.init_array.dot new file mode 100644 index 000000000000..3b9d6c9c5865 --- /dev/null +++ b/polly/docs/experiments/matmul/scops.init_array.dot @@ -0,0 +1,39 @@ +digraph "Scop Graph for 'init_array' function" { + label="Scop Graph for 'init_array' function"; + + Node0x5b5b5a0 [shape=record,label="{entry:\l br label %entry.split\l}"]; + Node0x5b5b5a0 -> Node0x5b5de30; + Node0x5b5de30 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"]; + Node0x5b5de30 -> Node0x5b5de50; + Node0x5b5de50 [shape=record,label="{for.cond1.preheader: \l %indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]\l br label %for.body3\l}"]; + Node0x5b5de50 -> Node0x5b5b570; + Node0x5b5b570 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %1 = trunc i64 %0 to i32\l %rem = srem i32 %1, 1024\l %add = add nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %3 = trunc i64 %2 to i32\l %rem8 = srem i32 %3, 1024\l %add9 = add nsw i32 %rem8, 1\l %conv10 = sitofp i32 %add9 to double\l %div11 = fmul double %conv10, 5.000000e-01\l %conv12 = fptrunc double %div11 to float\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv12, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"]; + Node0x5b5b570 -> Node0x5b5b570[constraint=false]; + Node0x5b5b570 -> Node0x5b5df30; + Node0x5b5df30 [shape=record,label="{for.inc17: \l %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1\l %exitcond7 = icmp ne i64 %indvars.iv.next6, 1536\l br i1 %exitcond7, label %for.cond1.preheader, label %for.end19\l}"]; + Node0x5b5df30 -> Node0x5b5de50[constraint=false]; + Node0x5b5df30 -> Node0x5b5df90; + Node0x5b5df90 [shape=record,label="{for.end19: \l ret void\l}"]; + colorscheme = "paired12" + subgraph cluster_0x5b4bdd0 { + label = ""; + style = solid; + color = 1 + subgraph cluster_0x5b4bf50 { + label = "Region can not profitably be optimized!"; + style = solid; + color = 6 + subgraph cluster_0x5b4c0d0 { + label = ""; + style = solid; + color = 5 + Node0x5b5b570; + } + Node0x5b5de50; + Node0x5b5df30; + } + Node0x5b5b5a0; + Node0x5b5de30; + Node0x5b5df90; + } +} diff --git a/polly/docs/experiments/matmul/scops.init_array.dot.png b/polly/docs/experiments/matmul/scops.init_array.dot.png Binary files differnew file mode 100644 index 000000000000..48a9f38946a9 --- /dev/null +++ b/polly/docs/experiments/matmul/scops.init_array.dot.png diff --git a/polly/docs/experiments/matmul/scops.main.dot b/polly/docs/experiments/matmul/scops.main.dot new file mode 100644 index 000000000000..e4abe8fbec88 --- /dev/null +++ b/polly/docs/experiments/matmul/scops.main.dot @@ -0,0 +1,50 @@ +digraph "Scop Graph for 'main' function" { + label="Scop Graph for 'main' function"; + + Node0x5b5c850 [shape=record,label="{entry:\l br label %entry.split\l}"]; + Node0x5b5c850 -> Node0x5b5a440; + Node0x5b5a440 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"]; + Node0x5b5a440 -> Node0x5b38cd0; + Node0x5b38cd0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"]; + Node0x5b38cd0 -> Node0x5b4bd30; + Node0x5b4bd30 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"]; + Node0x5b4bd30 -> Node0x5b38c50; + Node0x5b38c50 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l %0 = load float, float* %arrayidx12, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l %arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float %add, float* %arrayidx24, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"]; + Node0x5b38c50 -> Node0x5b38c50[constraint=false]; + Node0x5b38c50 -> Node0x5b5a290; + Node0x5b5a290 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"]; + Node0x5b5a290 -> Node0x5b4bd30[constraint=false]; + Node0x5b5a290 -> Node0x5b5a340; + Node0x5b5a340 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"]; + Node0x5b5a340 -> Node0x5b38cd0[constraint=false]; + Node0x5b5a340 -> Node0x5b5a3a0; + Node0x5b5a3a0 [shape=record,label="{for.end30: \l ret i32 0\l}"]; + colorscheme = "paired12" + subgraph cluster_0x5b5c970 { + label = ""; + style = solid; + color = 1 + subgraph cluster_0x5b5c5a0 { + label = ""; + style = filled; + color = 3 subgraph cluster_0x5b5c9f0 { + label = ""; + style = solid; + color = 5 + subgraph cluster_0x5b5c110 { + label = ""; + style = solid; + color = 7 + Node0x5b38c50; + } + Node0x5b4bd30; + Node0x5b5a290; + } + Node0x5b38cd0; + Node0x5b5a340; + } + Node0x5b5c850; + Node0x5b5a440; + Node0x5b5a3a0; + } +} diff --git a/polly/docs/experiments/matmul/scops.main.dot.png b/polly/docs/experiments/matmul/scops.main.dot.png Binary files differnew file mode 100644 index 000000000000..4e73701a08d7 --- /dev/null +++ b/polly/docs/experiments/matmul/scops.main.dot.png diff --git a/polly/docs/experiments/matmul/scops.print_array.dot b/polly/docs/experiments/matmul/scops.print_array.dot new file mode 100644 index 000000000000..748ccb170cd0 --- /dev/null +++ b/polly/docs/experiments/matmul/scops.print_array.dot @@ -0,0 +1,51 @@ +digraph "Scop Graph for 'print_array' function" { + label="Scop Graph for 'print_array' function"; + + Node0x5b5ee00 [shape=record,label="{entry:\l br label %entry.split\l}"]; + Node0x5b5ee00 -> Node0x5b5ee50; + Node0x5b5ee50 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"]; + Node0x5b5ee50 -> Node0x5b5ee70; + Node0x5b5ee70 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"]; + Node0x5b5ee70 -> Node0x5b5ee20; + Node0x5b5ee20 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = srem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"]; + Node0x5b5ee20 -> Node0x5b60d10; + Node0x5b5ee20 -> Node0x5b60d70; + Node0x5b60d10 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"]; + Node0x5b60d10 -> Node0x5b60d70; + Node0x5b60d70 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"]; + Node0x5b60d70 -> Node0x5b5ee20[constraint=false]; + Node0x5b60d70 -> Node0x5b60e10; + Node0x5b60e10 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"]; + Node0x5b60e10 -> Node0x5b5ee70[constraint=false]; + Node0x5b60e10 -> Node0x5b60e70; + Node0x5b60e70 [shape=record,label="{for.end12: \l ret void\l}"]; + colorscheme = "paired12" + subgraph cluster_0x5b349a0 { + label = ""; + style = solid; + color = 1 + subgraph cluster_0x5b5c2c0 { + label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2"; + style = solid; + color = 6 + subgraph cluster_0x5b5c240 { + label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2"; + style = solid; + color = 5 + subgraph cluster_0x5b34a20 { + label = "Region can not profitably be optimized!"; + style = solid; + color = 7 + Node0x5b5ee20; + Node0x5b60d10; + } + Node0x5b60d70; + } + Node0x5b5ee70; + Node0x5b60e10; + } + Node0x5b5ee00; + Node0x5b5ee50; + Node0x5b60e70; + } +} diff --git a/polly/docs/experiments/matmul/scops.print_array.dot.png b/polly/docs/experiments/matmul/scops.print_array.dot.png Binary files differnew file mode 100644 index 000000000000..e3b973b131ab --- /dev/null +++ b/polly/docs/experiments/matmul/scops.print_array.dot.png diff --git a/polly/docs/experiments/matmul/scopsonly.init_array.dot b/polly/docs/experiments/matmul/scopsonly.init_array.dot new file mode 100644 index 000000000000..3d2092b21c93 --- /dev/null +++ b/polly/docs/experiments/matmul/scopsonly.init_array.dot @@ -0,0 +1,39 @@ +digraph "Scop Graph for 'init_array' function" { + label="Scop Graph for 'init_array' function"; + + Node0x5ae2570 [shape=record,label="{entry}"]; + Node0x5ae2570 -> Node0x5ae4e90; + Node0x5ae4e90 [shape=record,label="{entry.split}"]; + Node0x5ae4e90 -> Node0x5ae4f50; + Node0x5ae4f50 [shape=record,label="{for.cond1.preheader}"]; + Node0x5ae4f50 -> Node0x5ae50e0; + Node0x5ae50e0 [shape=record,label="{for.body3}"]; + Node0x5ae50e0 -> Node0x5ae50e0[constraint=false]; + Node0x5ae50e0 -> Node0x5ae5100; + Node0x5ae5100 [shape=record,label="{for.inc17}"]; + Node0x5ae5100 -> Node0x5ae4f50[constraint=false]; + Node0x5ae5100 -> Node0x5ae4ff0; + Node0x5ae4ff0 [shape=record,label="{for.end19}"]; + colorscheme = "paired12" + subgraph cluster_0x5ad2dd0 { + label = ""; + style = solid; + color = 1 + subgraph cluster_0x5ad2f50 { + label = "Region can not profitably be optimized!"; + style = solid; + color = 6 + subgraph cluster_0x5ad30d0 { + label = ""; + style = solid; + color = 5 + Node0x5ae50e0; + } + Node0x5ae4f50; + Node0x5ae5100; + } + Node0x5ae2570; + Node0x5ae4e90; + Node0x5ae4ff0; + } +} diff --git a/polly/docs/experiments/matmul/scopsonly.init_array.dot.png b/polly/docs/experiments/matmul/scopsonly.init_array.dot.png Binary files differnew file mode 100644 index 000000000000..f101d4d30815 --- /dev/null +++ b/polly/docs/experiments/matmul/scopsonly.init_array.dot.png diff --git a/polly/docs/experiments/matmul/scopsonly.main.dot b/polly/docs/experiments/matmul/scopsonly.main.dot new file mode 100644 index 000000000000..c2d60c7ded64 --- /dev/null +++ b/polly/docs/experiments/matmul/scopsonly.main.dot @@ -0,0 +1,50 @@ +digraph "Scop Graph for 'main' function" { + label="Scop Graph for 'main' function"; + + Node0x5abfcf0 [shape=record,label="{entry}"]; + Node0x5abfcf0 -> Node0x5ade060; + Node0x5ade060 [shape=record,label="{entry.split}"]; + Node0x5ade060 -> Node0x5ade0e0; + Node0x5ade0e0 [shape=record,label="{for.cond1.preheader}"]; + Node0x5ade0e0 -> Node0x5ade100; + Node0x5ade100 [shape=record,label="{for.body3}"]; + Node0x5ade100 -> Node0x5ae0020; + Node0x5ae0020 [shape=record,label="{for.body8}"]; + Node0x5ae0020 -> Node0x5ae0020[constraint=false]; + Node0x5ae0020 -> Node0x5ae0080; + Node0x5ae0080 [shape=record,label="{for.inc25}"]; + Node0x5ae0080 -> Node0x5ade100[constraint=false]; + Node0x5ae0080 -> Node0x5adfef0; + Node0x5adfef0 [shape=record,label="{for.inc28}"]; + Node0x5adfef0 -> Node0x5ade0e0[constraint=false]; + Node0x5adfef0 -> Node0x5adff50; + Node0x5adff50 [shape=record,label="{for.end30}"]; + colorscheme = "paired12" + subgraph cluster_0x5ad2c80 { + label = ""; + style = solid; + color = 1 + subgraph cluster_0x5ad2e50 { + label = ""; + style = filled; + color = 3 subgraph cluster_0x5ad2d00 { + label = ""; + style = solid; + color = 5 + subgraph cluster_0x5ad2dd0 { + label = ""; + style = solid; + color = 7 + Node0x5ae0020; + } + Node0x5ade100; + Node0x5ae0080; + } + Node0x5ade0e0; + Node0x5adfef0; + } + Node0x5abfcf0; + Node0x5ade060; + Node0x5adff50; + } +} diff --git a/polly/docs/experiments/matmul/scopsonly.main.dot.png b/polly/docs/experiments/matmul/scopsonly.main.dot.png Binary files differnew file mode 100644 index 000000000000..32634243888d --- /dev/null +++ b/polly/docs/experiments/matmul/scopsonly.main.dot.png diff --git a/polly/docs/experiments/matmul/scopsonly.print_array.dot b/polly/docs/experiments/matmul/scopsonly.print_array.dot new file mode 100644 index 000000000000..0f7de45e8772 --- /dev/null +++ b/polly/docs/experiments/matmul/scopsonly.print_array.dot @@ -0,0 +1,51 @@ +digraph "Scop Graph for 'print_array' function" { + label="Scop Graph for 'print_array' function"; + + Node0x5ae5e30 [shape=record,label="{entry}"]; + Node0x5ae5e30 -> Node0x5ae5f50; + Node0x5ae5f50 [shape=record,label="{entry.split}"]; + Node0x5ae5f50 -> Node0x5ae7d90; + Node0x5ae7d90 [shape=record,label="{for.cond1.preheader}"]; + Node0x5ae7d90 -> Node0x5ae7f20; + Node0x5ae7f20 [shape=record,label="{for.body3}"]; + Node0x5ae7f20 -> Node0x5ae7f40; + Node0x5ae7f20 -> Node0x5ae7f60; + Node0x5ae7f40 [shape=record,label="{if.then}"]; + Node0x5ae7f40 -> Node0x5ae7f60; + Node0x5ae7f60 [shape=record,label="{for.inc}"]; + Node0x5ae7f60 -> Node0x5ae7f20[constraint=false]; + Node0x5ae7f60 -> Node0x5ae7e30; + Node0x5ae7e30 [shape=record,label="{for.end}"]; + Node0x5ae7e30 -> Node0x5ae7d90[constraint=false]; + Node0x5ae7e30 -> Node0x5ae8110; + Node0x5ae8110 [shape=record,label="{for.end12}"]; + colorscheme = "paired12" + subgraph cluster_0x5abb9a0 { + label = ""; + style = solid; + color = 1 + subgraph cluster_0x5ae32c0 { + label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2"; + style = solid; + color = 6 + subgraph cluster_0x5ae3240 { + label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2"; + style = solid; + color = 5 + subgraph cluster_0x5abba20 { + label = "Region can not profitably be optimized!"; + style = solid; + color = 7 + Node0x5ae7f20; + Node0x5ae7f40; + } + Node0x5ae7f60; + } + Node0x5ae7d90; + Node0x5ae7e30; + } + Node0x5ae5e30; + Node0x5ae5f50; + Node0x5ae8110; + } +} diff --git a/polly/docs/experiments/matmul/scopsonly.print_array.dot.png b/polly/docs/experiments/matmul/scopsonly.print_array.dot.png Binary files differnew file mode 100644 index 000000000000..b0d4b45aace4 --- /dev/null +++ b/polly/docs/experiments/matmul/scopsonly.print_array.dot.png |