summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2010-07-07 20:40:59 -0700
committerDavid Schleef <ds@schleef.org>2010-07-09 16:57:18 -0700
commitc2d0071b598d60c9dfdb40456ecea67931f734a7 (patch)
tree434dbb39422e85caed8b5a6c6f632d4ce0c27e3f
parent43c138021a9ab6e6879d8b3fe2d7fc117f1df9d0 (diff)
downloadorc-c2d0071b598d60c9dfdb40456ecea67931f734a7.tar.gz
neon: Only preload for each cache line
-rw-r--r--orc/orcprogram-neon.c20
1 files changed, 11 insertions, 9 deletions
diff --git a/orc/orcprogram-neon.c b/orc/orcprogram-neon.c
index 26ef3c8..38defdd 100644
--- a/orc/orcprogram-neon.c
+++ b/orc/orcprogram-neon.c
@@ -16,7 +16,7 @@
#define SIZE 65536
-void orc_neon_emit_loop (OrcCompiler *compiler);
+void orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index);
void orc_compiler_neon_register_rules (OrcTarget *target);
unsigned int orc_compiler_neon_get_default_flags (void);
@@ -465,7 +465,7 @@ orc_neon_restore_unalignment (OrcCompiler *compiler)
}
void
-orc_neon_emit_load_src (OrcCompiler *compiler, OrcVariable *var)
+orc_neon_emit_load_src (OrcCompiler *compiler, OrcVariable *var, int unroll_index)
{
int ptr_reg;
int update;
@@ -502,6 +502,7 @@ orc_neon_emit_load_src (OrcCompiler *compiler, OrcVariable *var)
ORC_ERROR("bad size");
}
+ if (unroll_index == 0) {
switch (compiler->size_region) {
case 0:
case 1:
@@ -512,6 +513,7 @@ orc_neon_emit_load_src (OrcCompiler *compiler, OrcVariable *var)
orc_neon_preload (compiler, var, FALSE, 208);
break;
}
+ }
}
void
@@ -700,7 +702,7 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
orc_arm_emit_branch (compiler, ORC_ARM_COND_EQ, LABEL_REGION1_SKIP);
orc_arm_emit_label (compiler, LABEL_REGION1_LOOP);
- orc_neon_emit_loop (compiler);
+ orc_neon_emit_loop (compiler, 0);
orc_arm_emit_sub_imm (compiler, ORC_ARM_IP, ORC_ARM_IP, 1, TRUE);
orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_REGION1_LOOP);
orc_arm_emit_label (compiler, LABEL_REGION1_SKIP);
@@ -736,7 +738,7 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
orc_arm_emit_label (compiler, LABEL_REGION2_LOOP_LARGE);
orc_arm_emit_sub_imm (compiler, ORC_ARM_IP, ORC_ARM_IP, 1, TRUE);
for(i=0;i<(1<<compiler->unroll_shift);i++){
- orc_neon_emit_loop (compiler);
+ orc_neon_emit_loop (compiler, i);
}
orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_REGION2_LOOP_LARGE);
orc_arm_emit_branch (compiler, ORC_ARM_COND_AL, LABEL_REGION2_SKIP);
@@ -752,7 +754,7 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
orc_arm_emit_label (compiler, LABEL_REGION2_LOOP_MEDIUM);
orc_arm_emit_sub_imm (compiler, ORC_ARM_IP, ORC_ARM_IP, 1, TRUE);
for(i=0;i<(1<<compiler->unroll_shift);i++){
- orc_neon_emit_loop (compiler);
+ orc_neon_emit_loop (compiler, i);
}
orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_REGION2_LOOP_MEDIUM);
orc_arm_emit_branch (compiler, ORC_ARM_COND_AL, LABEL_REGION2_SKIP);
@@ -763,7 +765,7 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
orc_arm_emit_label (compiler, LABEL_REGION2_LOOP_SMALL);
orc_arm_emit_sub_imm (compiler, ORC_ARM_IP, ORC_ARM_IP, 1, TRUE);
for(i=0;i<(1<<compiler->unroll_shift);i++){
- orc_neon_emit_loop (compiler);
+ orc_neon_emit_loop (compiler, i);
}
orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_REGION2_LOOP_SMALL);
@@ -787,7 +789,7 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
orc_arm_emit_branch (compiler, ORC_ARM_COND_EQ, LABEL_REGION3_SKIP);
orc_arm_emit_label (compiler, LABEL_REGION3_LOOP);
- orc_neon_emit_loop (compiler);
+ orc_neon_emit_loop (compiler, 0);
orc_arm_emit_sub_imm (compiler, ORC_ARM_IP, ORC_ARM_IP, 1, TRUE);
orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_REGION3_LOOP);
orc_arm_emit_label (compiler, LABEL_REGION3_SKIP);
@@ -818,7 +820,7 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
}
void
-orc_neon_emit_loop (OrcCompiler *compiler)
+orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index)
{
int j;
int k;
@@ -852,7 +854,7 @@ orc_neon_emit_loop (OrcCompiler *compiler)
switch (compiler->vars[insn->src_args[k]].vartype) {
case ORC_VAR_TYPE_SRC:
case ORC_VAR_TYPE_DEST:
- orc_neon_emit_load_src (compiler, &compiler->vars[insn->src_args[k]]);
+ orc_neon_emit_load_src (compiler, &compiler->vars[insn->src_args[k]], unroll_index);
break;
case ORC_VAR_TYPE_CONST:
break;