diff options
Diffstat (limited to 'vp8/decoder/arm/detokenizearm_v6.asm')
-rw-r--r-- | vp8/decoder/arm/detokenizearm_v6.asm | 364 |
1 files changed, 364 insertions, 0 deletions
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm new file mode 100644 index 000000000..4d87ee5bd --- /dev/null +++ b/vp8/decoder/arm/detokenizearm_v6.asm @@ -0,0 +1,364 @@ +; +; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + EXPORT |vp8_decode_mb_tokens_v5| + + AREA |.text|, CODE, READONLY ; name this block of code + + INCLUDE vpx_asm_offsets.asm + +l_qcoeff EQU 0 +l_i EQU 4 +l_type EQU 8 +l_stop EQU 12 +l_c EQU 16 +l_l_ptr EQU 20 +l_a_ptr EQU 24 +l_bc EQU 28 +l_coef_ptr EQU 32 +l_stacksize EQU 64 + + +;; constant offsets -- these should be created at build time +c_onyxblock2left_offset EQU 25 +c_onyxblock2above_offset EQU 50 +c_entropy_nodes EQU 11 +c_dct_eob_token EQU 11 + +|vp8_decode_mb_tokens_v5| PROC + stmdb sp!, {r4 - r11, lr} + sub sp, sp, #l_stacksize + mov r7, r1 + mov r9, r0 ;DETOK *detoken + + ldr r1, [r9, #detok_current_bc] + ldr r0, [r9, #detok_qcoeff_start_ptr] + mov r11, #0 + mov r3, #0x10 + + cmp r7, #1 + addeq r11, r11, #24 + addeq r3, r3, #8 + addeq r0, r0, #3, 24 + + str r0, [sp, #l_qcoeff] + str r11, [sp, #l_i] + str r7, [sp, #l_type] + str r3, [sp, #l_stop] + str r1, [sp, #l_bc] + + add lr, r9, r7, lsl #2 + + ldr r2, [r1, #bool_decoder_buffer] + ldr r3, [r1, #bool_decoder_pos] + + ldr r10, [lr, #detok_coef_probs] + ldr r5, [r1, #bool_decoder_count] + ldr r6, [r1, #bool_decoder_range] + ldr r4, [r1, #bool_decoder_value] + add r8, r2, r3 + + str r10, [sp, #l_coef_ptr] + + + ;align 4 +BLOCK_LOOP + ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove] + ldr r2, [r9, #DETOK_A] + ldr r1, [r9, #DETOK_L] + ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i] + + cmp r7, #0 ; check type + moveq r7, #1 + movne r7, #0 + + ldr r0, [r2, +r12, lsl #2] ; a + add r1, r1, r12, lsl #4 + add r3, r3, r11 + + ldrb r2, [r3, #c_onyxblock2above_offset] + ldrb r3, [r3, #c_onyxblock2left_offset] + mov lr, #c_entropy_nodes +;; ;++ + + ldr r2, [r0, +r2, lsl #2]! + add r3, r1, r3, lsl #2 + str r3, [sp, #l_l_ptr] + ldr r3, [r3] + + cmp r2, #0 + movne r2, #1 + cmp r3, #0 + addne r2, r2, #1 + + str r0, [sp, #l_a_ptr] + smlabb r0, r2, lr, r10 + mov r1, #0 ; t = 0 + str r7, [sp, #l_c] + + ;align 4 +COEFF_LOOP + ldr r3, [r9, #detok_ptr_onyx_coef_bands_x] + ldr lr, [r9, #detok_onyx_coef_tree_ptr] + +;;the following two lines are used if onyx_coef_bands_x is UINT16 +;; add r3, r3, r7, lsl #1 +;; ldrh r3, [r3] + +;;the following line is used if onyx_coef_bands_x is UINT8 + ldrb r3, [r7, +r3] + + +;; ;++ +;; pld [r8] + ;++ + add r0, r0, r3 + + ;align 4 +get_token_loop + ldrb r2, [r0, +r1, asr #1] + mov r3, r6, lsl #8 + sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8) + mov r10, #1 + + smlawb r2, r3, r2, r10 + ldrb r12, [r8] ;load cx data byte in stall slot + ;++ + + subs r3, r4, r2, lsl #24 ;x = value-(split<<24) + addhs r1, r1, #1 ;t += 1 + movhs r4, r3 ;update value + subhs r2, r6, r2 ;range = range - split + movlo r6, r2 + +;;; ldrsbhs r1, [r1, +lr] + ldrsb r1, [r1, +lr] + + +;; use branch for short pipelines ??? +;; cmp r2, #0x80 +;; bcs |$LN22@decode_mb_to| + + clz r3, r2 + sub r3, r3, #24 + subs r5, r5, r3 + mov r6, r2, lsl r3 + mov r4, r4, lsl r3 + +;; use branch for short pipelines ??? +;; bgt |$LN22@decode_mb_to| + + addle r5, r5, #8 + rsble r3, r5, #8 + addle r8, r8, #1 + orrle r4, r4, r12, lsl r3 + +;;|$LN22@decode_mb_to| + + cmp r1, #0 + bgt get_token_loop + + cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN) + beq END_OF_BLOCK + + rsb lr, r1, #0 ;v = -t; + + cmp lr, #4 ;if(v > FOUR_TOKEN) + ble SKIP_EXTRABITS + + ldr r3, [r9, #detok_teb_base_ptr] + mov r11, #1 + add r7, r3, lr, lsl #4 + + ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val + ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length + +extrabits_loop + add r3, r0, r7 + + ldrb r2, [r3, #4] + mov r3, r6, lsl #8 + sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8) + mov r10, #1 + + smlawb r2, r3, r2, r10 + ldrb r12, [r8] + ;++ + + subs r10, r4, r2, lsl #24 ;x = value-(split<<24) + movhs r4, r10 ;update value + subhs r2, r6, r2 ;range = range - split + addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count) + movlo r6, r2 ;range = split + + +;; use branch for short pipelines ??? +;; cmp r2, #0x80 +;; bcs |$LN10@decode_mb_to| + + clz r3, r2 + sub r3, r3, #24 + subs r5, r5, r3 + mov r6, r2, lsl r3 ;range + mov r4, r4, lsl r3 ;value + + addle r5, r5, #8 + addle r8, r8, #1 + rsble r3, r5, #8 + orrle r4, r4, r12, lsl r3 + +;;|$LN10@decode_mb_to| + subs r0, r0, #1 + bpl extrabits_loop + + +SKIP_EXTRABITS + ldr r11, [sp, #l_qcoeff] + ldr r0, [sp, #l_coef_ptr] + + cmp r1, #0 ;check for nonzero token + beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec + + sub r3, r6, #1 ;range - 1 + ;++ + mov r3, r3, lsl #7 ; *= onyx_prob_half (128) + ;++ + mov r3, r3, lsr #8 + add r2, r3, #1 ;split + + subs r3, r4, r2, lsl #24 ;x = value-(split<<24) + movhs r4, r3 ;update value + subhs r2, r6, r2 ;range = range - split + mvnhs r3, lr + addhs lr, r3, #1 ;v = (v ^ -1) + 1 + movlo r6, r2 ;range = split + +;; use branch for short pipelines ??? +;; cmp r2, #0x80 +;; bcs |$LN6@decode_mb_to| + + clz r3, r2 + sub r3, r3, #24 + subs r5, r5, r3 + mov r6, r2, lsl r3 + mov r4, r4, lsl r3 + ldrleb r2, [r8], #1 + addle r5, r5, #8 + rsble r3, r5, #8 + orrle r4, r4, r2, lsl r3 + +;;|$LN6@decode_mb_to| + add r0, r0, #0xB + + cmn r1, #1 + + addlt r0, r0, #0xB + + mvn r1, #1 + +SKIP_EOB_CHECK + ldr r7, [sp, #l_c] + ldr r3, [r9, #detok_scan] + add r1, r1, #2 + cmp r7, #(0x10 - 1) ;assume one less for now.... increment below + + ldr r3, [r3, +r7, lsl #2] + add r7, r7, #1 + add r3, r11, r3, lsl #1 + + str r7, [sp, #l_c] + strh lr, [r3] + + blt COEFF_LOOP + + sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c + +END_OF_BLOCK + ldr r3, [sp, #l_type] + ldr r10, [sp, #l_coef_ptr] + ldr r0, [sp, #l_qcoeff] + ldr r11, [sp, #l_i] + ldr r12, [sp, #l_stop] + + cmp r3, #0 + moveq r1, #1 + movne r1, #0 + add r3, r11, r9 + + cmp r7, r1 + strb r7, [r3, #detok_eob] + + ldr r7, [sp, #l_l_ptr] + ldr r2, [sp, #l_a_ptr] + movne r3, #1 + moveq r3, #0 + + add r0, r0, #0x20 + add r11, r11, #1 + str r3, [r7] + str r3, [r2] + str r0, [sp, #l_qcoeff] + str r11, [sp, #l_i] + + cmp r11, r12 ;i >= stop ? + ldr r7, [sp, #l_type] + mov lr, #0xB + + blt BLOCK_LOOP + + cmp r11, #0x19 + bne ln2_decode_mb_to + + ldr r12, [r9, #detok_qcoeff_start_ptr] + ldr r10, [r9, #detok_coef_probs] + mov r7, #0 + mov r3, #0x10 + str r12, [sp, #l_qcoeff] + str r7, [sp, #l_i] + str r7, [sp, #l_type] + str r3, [sp, #l_stop] + + str r10, [sp, #l_coef_ptr] + + b BLOCK_LOOP + +ln2_decode_mb_to + cmp r11, #0x10 + bne ln1_decode_mb_to + + ldr r10, [r9, #0x30] + + mov r7, #2 + mov r3, #0x18 + + str r7, [sp, #l_type] + str r3, [sp, #l_stop] + + str r10, [sp, #l_coef_ptr] + b BLOCK_LOOP + +ln1_decode_mb_to + ldr r2, [sp, #l_bc] + mov r0, #0 + nop + + ldr r3, [r2, #bool_decoder_buffer] + str r5, [r2, #bool_decoder_count] + str r4, [r2, #bool_decoder_value] + sub r3, r8, r3 + str r3, [r2, #bool_decoder_pos] + str r6, [r2, #bool_decoder_range] + + add sp, sp, #l_stacksize + ldmia sp!, {r4 - r11, pc} + + ENDP ; |vp8_decode_mb_tokens_v5| + + END |