summaryrefslogtreecommitdiff
path: root/vp8/decoder/arm/detokenizearm_v6.asm
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/decoder/arm/detokenizearm_v6.asm')
-rw-r--r--vp8/decoder/arm/detokenizearm_v6.asm364
1 files changed, 364 insertions, 0 deletions
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
new file mode 100644
index 000000000..4d87ee5bd
--- /dev/null
+++ b/vp8/decoder/arm/detokenizearm_v6.asm
@@ -0,0 +1,364 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_decode_mb_tokens_v5|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+ INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff EQU 0
+l_i EQU 4
+l_type EQU 8
+l_stop EQU 12
+l_c EQU 16
+l_l_ptr EQU 20
+l_a_ptr EQU 24
+l_bc EQU 28
+l_coef_ptr EQU 32
+l_stacksize EQU 64
+
+
+;; constant offsets -- these should be created at build time
+c_onyxblock2left_offset EQU 25
+c_onyxblock2above_offset EQU 50
+c_entropy_nodes EQU 11
+c_dct_eob_token EQU 11
+
+|vp8_decode_mb_tokens_v5| PROC
+ stmdb sp!, {r4 - r11, lr}
+ sub sp, sp, #l_stacksize
+ mov r7, r1
+ mov r9, r0 ;DETOK *detoken
+
+ ldr r1, [r9, #detok_current_bc]
+ ldr r0, [r9, #detok_qcoeff_start_ptr]
+ mov r11, #0
+ mov r3, #0x10
+
+ cmp r7, #1
+ addeq r11, r11, #24
+ addeq r3, r3, #8
+ addeq r0, r0, #3, 24
+
+ str r0, [sp, #l_qcoeff]
+ str r11, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+ str r1, [sp, #l_bc]
+
+ add lr, r9, r7, lsl #2
+
+ ldr r2, [r1, #bool_decoder_buffer]
+ ldr r3, [r1, #bool_decoder_pos]
+
+ ldr r10, [lr, #detok_coef_probs]
+ ldr r5, [r1, #bool_decoder_count]
+ ldr r6, [r1, #bool_decoder_range]
+ ldr r4, [r1, #bool_decoder_value]
+ add r8, r2, r3
+
+ str r10, [sp, #l_coef_ptr]
+
+
+ ;align 4
+BLOCK_LOOP
+ ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
+ ldr r2, [r9, #DETOK_A]
+ ldr r1, [r9, #DETOK_L]
+ ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i]
+
+ cmp r7, #0 ; check type
+ moveq r7, #1
+ movne r7, #0
+
+ ldr r0, [r2, +r12, lsl #2] ; a
+ add r1, r1, r12, lsl #4
+ add r3, r3, r11
+
+ ldrb r2, [r3, #c_onyxblock2above_offset]
+ ldrb r3, [r3, #c_onyxblock2left_offset]
+ mov lr, #c_entropy_nodes
+;; ;++
+
+ ldr r2, [r0, +r2, lsl #2]!
+ add r3, r1, r3, lsl #2
+ str r3, [sp, #l_l_ptr]
+ ldr r3, [r3]
+
+ cmp r2, #0
+ movne r2, #1
+ cmp r3, #0
+ addne r2, r2, #1
+
+ str r0, [sp, #l_a_ptr]
+ smlabb r0, r2, lr, r10
+ mov r1, #0 ; t = 0
+ str r7, [sp, #l_c]
+
+ ;align 4
+COEFF_LOOP
+ ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
+ ldr lr, [r9, #detok_onyx_coef_tree_ptr]
+
+;;the following two lines are used if onyx_coef_bands_x is UINT16
+;; add r3, r3, r7, lsl #1
+;; ldrh r3, [r3]
+
+;;the following line is used if onyx_coef_bands_x is UINT8
+ ldrb r3, [r7, +r3]
+
+
+;; ;++
+;; pld [r8]
+ ;++
+ add r0, r0, r3
+
+ ;align 4
+get_token_loop
+ ldrb r2, [r0, +r1, asr #1]
+ mov r3, r6, lsl #8
+ sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
+ mov r10, #1
+
+ smlawb r2, r3, r2, r10
+ ldrb r12, [r8] ;load cx data byte in stall slot
+ ;++
+
+ subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
+ addhs r1, r1, #1 ;t += 1
+ movhs r4, r3 ;update value
+ subhs r2, r6, r2 ;range = range - split
+ movlo r6, r2
+
+;;; ldrsbhs r1, [r1, +lr]
+ ldrsb r1, [r1, +lr]
+
+
+;; use branch for short pipelines ???
+;; cmp r2, #0x80
+;; bcs |$LN22@decode_mb_to|
+
+ clz r3, r2
+ sub r3, r3, #24
+ subs r5, r5, r3
+ mov r6, r2, lsl r3
+ mov r4, r4, lsl r3
+
+;; use branch for short pipelines ???
+;; bgt |$LN22@decode_mb_to|
+
+ addle r5, r5, #8
+ rsble r3, r5, #8
+ addle r8, r8, #1
+ orrle r4, r4, r12, lsl r3
+
+;;|$LN22@decode_mb_to|
+
+ cmp r1, #0
+ bgt get_token_loop
+
+ cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN)
+ beq END_OF_BLOCK
+
+ rsb lr, r1, #0 ;v = -t;
+
+ cmp lr, #4 ;if(v > FOUR_TOKEN)
+ ble SKIP_EXTRABITS
+
+ ldr r3, [r9, #detok_teb_base_ptr]
+ mov r11, #1
+ add r7, r3, lr, lsl #4
+
+ ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
+ ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
+
+extrabits_loop
+ add r3, r0, r7
+
+ ldrb r2, [r3, #4]
+ mov r3, r6, lsl #8
+ sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
+ mov r10, #1
+
+ smlawb r2, r3, r2, r10
+ ldrb r12, [r8]
+ ;++
+
+ subs r10, r4, r2, lsl #24 ;x = value-(split<<24)
+ movhs r4, r10 ;update value
+ subhs r2, r6, r2 ;range = range - split
+ addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count)
+ movlo r6, r2 ;range = split
+
+
+;; use branch for short pipelines ???
+;; cmp r2, #0x80
+;; bcs |$LN10@decode_mb_to|
+
+ clz r3, r2
+ sub r3, r3, #24
+ subs r5, r5, r3
+ mov r6, r2, lsl r3 ;range
+ mov r4, r4, lsl r3 ;value
+
+ addle r5, r5, #8
+ addle r8, r8, #1
+ rsble r3, r5, #8
+ orrle r4, r4, r12, lsl r3
+
+;;|$LN10@decode_mb_to|
+ subs r0, r0, #1
+ bpl extrabits_loop
+
+
+SKIP_EXTRABITS
+ ldr r11, [sp, #l_qcoeff]
+ ldr r0, [sp, #l_coef_ptr]
+
+ cmp r1, #0 ;check for nonzero token
+ beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec
+
+ sub r3, r6, #1 ;range - 1
+ ;++
+ mov r3, r3, lsl #7 ; *= onyx_prob_half (128)
+ ;++
+ mov r3, r3, lsr #8
+ add r2, r3, #1 ;split
+
+ subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
+ movhs r4, r3 ;update value
+ subhs r2, r6, r2 ;range = range - split
+ mvnhs r3, lr
+ addhs lr, r3, #1 ;v = (v ^ -1) + 1
+ movlo r6, r2 ;range = split
+
+;; use branch for short pipelines ???
+;; cmp r2, #0x80
+;; bcs |$LN6@decode_mb_to|
+
+ clz r3, r2
+ sub r3, r3, #24
+ subs r5, r5, r3
+ mov r6, r2, lsl r3
+ mov r4, r4, lsl r3
+ ldrleb r2, [r8], #1
+ addle r5, r5, #8
+ rsble r3, r5, #8
+ orrle r4, r4, r2, lsl r3
+
+;;|$LN6@decode_mb_to|
+ add r0, r0, #0xB
+
+ cmn r1, #1
+
+ addlt r0, r0, #0xB
+
+ mvn r1, #1
+
+SKIP_EOB_CHECK
+ ldr r7, [sp, #l_c]
+ ldr r3, [r9, #detok_scan]
+ add r1, r1, #2
+ cmp r7, #(0x10 - 1) ;assume one less for now.... increment below
+
+ ldr r3, [r3, +r7, lsl #2]
+ add r7, r7, #1
+ add r3, r11, r3, lsl #1
+
+ str r7, [sp, #l_c]
+ strh lr, [r3]
+
+ blt COEFF_LOOP
+
+ sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c
+
+END_OF_BLOCK
+ ldr r3, [sp, #l_type]
+ ldr r10, [sp, #l_coef_ptr]
+ ldr r0, [sp, #l_qcoeff]
+ ldr r11, [sp, #l_i]
+ ldr r12, [sp, #l_stop]
+
+ cmp r3, #0
+ moveq r1, #1
+ movne r1, #0
+ add r3, r11, r9
+
+ cmp r7, r1
+ strb r7, [r3, #detok_eob]
+
+ ldr r7, [sp, #l_l_ptr]
+ ldr r2, [sp, #l_a_ptr]
+ movne r3, #1
+ moveq r3, #0
+
+ add r0, r0, #0x20
+ add r11, r11, #1
+ str r3, [r7]
+ str r3, [r2]
+ str r0, [sp, #l_qcoeff]
+ str r11, [sp, #l_i]
+
+ cmp r11, r12 ;i >= stop ?
+ ldr r7, [sp, #l_type]
+ mov lr, #0xB
+
+ blt BLOCK_LOOP
+
+ cmp r11, #0x19
+ bne ln2_decode_mb_to
+
+ ldr r12, [r9, #detok_qcoeff_start_ptr]
+ ldr r10, [r9, #detok_coef_probs]
+ mov r7, #0
+ mov r3, #0x10
+ str r12, [sp, #l_qcoeff]
+ str r7, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr]
+
+ b BLOCK_LOOP
+
+ln2_decode_mb_to
+ cmp r11, #0x10
+ bne ln1_decode_mb_to
+
+ ldr r10, [r9, #0x30]
+
+ mov r7, #2
+ mov r3, #0x18
+
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr]
+ b BLOCK_LOOP
+
+ln1_decode_mb_to
+ ldr r2, [sp, #l_bc]
+ mov r0, #0
+ nop
+
+ ldr r3, [r2, #bool_decoder_buffer]
+ str r5, [r2, #bool_decoder_count]
+ str r4, [r2, #bool_decoder_value]
+ sub r3, r8, r3
+ str r3, [r2, #bool_decoder_pos]
+ str r6, [r2, #bool_decoder_range]
+
+ add sp, sp, #l_stacksize
+ ldmia sp!, {r4 - r11, pc}
+
+ ENDP ; |vp8_decode_mb_tokens_v5|
+
+ END