summaryrefslogtreecommitdiff
path: root/vpx_scale/symbian/gen_scalers_armv4.asm
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_scale/symbian/gen_scalers_armv4.asm')
-rw-r--r--vpx_scale/symbian/gen_scalers_armv4.asm773
1 files changed, 773 insertions, 0 deletions
diff --git a/vpx_scale/symbian/gen_scalers_armv4.asm b/vpx_scale/symbian/gen_scalers_armv4.asm
new file mode 100644
index 000000000..1c904edae
--- /dev/null
+++ b/vpx_scale/symbian/gen_scalers_armv4.asm
@@ -0,0 +1,773 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |horizontal_line_4_5_scale_armv4|
+ EXPORT |vertical_band_4_5_scale_armv4|
+ EXPORT |horizontal_line_2_3_scale_armv4|
+ EXPORT |vertical_band_2_3_scale_armv4|
+ EXPORT |horizontal_line_3_5_scale_armv4|
+ EXPORT |vertical_band_3_5_scale_armv4|
+ EXPORT |horizontal_line_3_4_scale_armv4|
+ EXPORT |vertical_band_3_4_scale_armv4|
+ EXPORT |horizontal_line_1_2_scale_armv4|
+ EXPORT |vertical_band_1_2_scale_armv4|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+src RN r0
+srcw RN r1
+dest RN r2
+mask RN r12
+c51_205 RN r10
+c102_154 RN r11
+;/****************************************************************************
+; *
+; * ROUTINE : horizontal_line_4_5_scale_armv4
+; *
+; * INPUTS : const unsigned char *source : Pointer to source data.
+; * unsigned int source_width : Stride of source.
+; * unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_width : Stride of destination (NOT USED).
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Copies horizontal line of pixels from source to
+; * destination scaling up by 4 to 5.
+; *
+; * SPECIAL NOTES : None.
+; *
+; ****************************************************************************/
+;void horizontal_line_4_5_scale_armv4
+;(
+; r0 = UINT8 *source
+; r1 = UINT32 source_width
+; r2 = UINT8 *dest
+; r3 = UINT32 dest_width
+;)
+|horizontal_line_4_5_scale_armv4| PROC
+ stmdb sp!, {r4 - r11, lr}
+
+ mov mask, #255 ; mask for selection
+ ldr c51_205, =0x3300cd
+ ldr c102_154, =0x66009a
+
+ ldr r3, [src], #4
+
+hl45_loop
+
+ and r4, r3, mask ; a = src[0]
+ and r5, mask, r3, lsr #8 ; b = src[1]
+ strb r4, [dest], #1
+
+ orr r6, r4, r5, lsl #16 ; b | a
+ and r7, mask, r3, lsr #16 ; c = src[2]
+ mul r6, c51_205, r6 ; a * 51 + 205 * b
+
+ orr r5, r5, r7, lsl #16 ; c | b
+ mul r5, c102_154, r5 ; b * 102 + 154 * c
+ add r6, r6, #0x8000
+ and r8, mask, r3, lsr #24 ; d = src[3]
+ mov r6, r6, lsr #24
+ strb r6, [dest], #1
+
+ orr r7, r8, r7, lsl #16 ; c | d
+ mul r7, c102_154, r7 ; c * 154 + 102 * d
+ add r5, r5, #0x8000
+ ldr r3, [src], #4
+ mov r5, r5, lsr #24
+ strb r5, [dest], #1
+
+ add r7, r7, #0x8000
+ and r9, mask, r3 ; e = src[4]
+ orr r9, r9, r8, lsl #16 ; d | e
+ mul r9, c51_205, r9 ; d * 205 + 51 * e
+ mov r7, r7, lsr #24
+ strb r7, [dest], #1
+
+ add r9, r9, #0x8000
+ subs srcw, srcw, #4
+ mov r9, r9, lsr #24
+ strb r9, [dest], #1
+
+ bne hl45_loop
+
+ and r4, r3, mask
+ and r5, mask, r3, lsl #8
+ strb r4, [dest], #1
+
+ orr r6, r4, r5, lsl #16 ; b | a
+ mul r6, c51_205, r6
+
+ and r7, mask, r3, lsl #16
+ orr r5, r5, r7, lsl #16 ; c | b
+ mul r5, c102_154, r5
+ add r6, r6, #0x8000
+ and r8, mask, r3, lsl #24
+ mov r6, r6, lsr #24
+ strb r6, [dest], #1
+
+ orr r7, r8, r7, lsl #16 ; c | d
+ mul r7, c102_154, r7
+ add r5, r5, #0x8000
+ mov r5, r5, lsr #24
+ strb r5, [dest], #1
+
+ add r7, r7, #0x8000
+ mov r7, r7, lsr #24
+ strb r7, [dest], #1
+
+ ldrb r3, [src]
+ strb r3, [dest], #1
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ;|vp8cx_horizontal_line_4_5_scale_c|
+
+;/****************************************************************************
+; *
+; * ROUTINE : vertical_band_4_5_scale_armv4
+; *
+; * INPUTS : unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_pitch : Stride of destination data.
+; * unsigned int dest_width : Width of destination data.
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The
+; * height of the band scaled is 4-pixels.
+; *
+; * SPECIAL NOTES : The routine uses the first line of the band below
+; * the current band.
+; *
+; ****************************************************************************/
+;void vertical_band_4_5_scale_armv4
+;(
+; r0 = UINT8 *dest
+; r1 = UINT32 dest_pitch
+; r2 = UINT32 dest_width
+;)
+|vertical_band_4_5_scale_armv4| PROC
+ stmdb sp!, {r4 - r11, lr}
+
+ ldr c51_205, =0x3300cd
+ ldr c102_154, =0x66009a
+
+vl45_loop
+ mov r3, src
+ ldrb r4, [r3], r1 ; a = des [0]
+ ldrb r5, [r3], r1 ; b = des [dest_pitch]
+ ldrb r7, [r3], r1 ; c = des[dest_pitch*2]
+ add lr, src, r1
+
+ orr r6, r4, r5, lsl #16 ; b | a
+ mul r6, c51_205, r6 ; a * 51 + 205 * b
+
+ ldrb r8, [r3], r1 ; d = des[dest_pitch*3]
+ orr r5, r5, r7, lsl #16 ; c | b
+ mul r5, c102_154, r5 ; b * 102 + 154 * c
+ add r6, r6, #0x8000
+ orr r7, r8, r7, lsl #16 ; c | d
+ mov r6, r6, lsr #24
+ strb r6, [lr], r1
+
+ ldrb r9, [r3, r1] ; e = des [dest_pitch * 5]
+ mul r7, c102_154, r7 ; c * 154 + 102 * d
+ add r5, r5, #0x8000
+ orr r9, r9, r8, lsl #16 ; d | e
+ mov r5, r5, lsr #24
+ strb r5, [lr], r1
+
+ mul r9, c51_205, r9 ; d * 205 + 51 * e
+ add r7, r7, #0x8000
+ add src, src, #1
+ mov r7, r7, lsr #24
+ strb r7, [lr], r1
+
+ add r9, r9, #0x8000
+ subs r2, r2, #1
+ mov r9, r9, lsr #24
+ strb r9, [lr], r1
+
+ bne vl45_loop
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ;|vertical_band_4_5_scale_armv4|
+
+;/****************************************************************************
+; *
+; * ROUTINE : horizontal_line_2_3_scale_armv4
+; *
+; * INPUTS : const unsigned char *source : Pointer to source data.
+; * unsigned int source_width : Stride of source.
+; * unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_width : Stride of destination (NOT USED).
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Copies horizontal line of pixels from source to
+; * destination scaling up by 2 to 3.
+; *
+; * SPECIAL NOTES : None.
+; *
+; *
+; ****************************************************************************/
+;void horizontal_line_2_3_scale_armv4
+;(
+; const unsigned char *source,
+; unsigned int source_width,
+; unsigned char *dest,
+; unsigned int dest_width
+;)
+|horizontal_line_2_3_scale_armv4| PROC
+ stmdb sp!, {r4 - r11, lr}
+ ldr lr, =85
+ ldr r12, =171
+
+hl23_loop
+
+ ldrb r3, [src], #1 ; a
+ ldrb r4, [src], #1 ; b
+ ldrb r5, [src] ; c
+
+ strb r3, [dest], #1
+ mul r4, r12, r4 ; b * 171
+ mla r6, lr, r3, r4 ; a * 85
+ mla r7, lr, r5, r4 ; c * 85
+
+ add r6, r6, #128
+ mov r6, r6, lsr #8
+ strb r6, [dest], #1
+
+ add r7, r7, #128
+ mov r7, r7, lsr #8
+ strb r7, [dest], #1
+
+ subs srcw, srcw, #2
+ bne hl23_loop
+
+ ldrb r4, [src, #1] ; b
+ strb r5, [dest], #1
+ strb r4, [dest, #1]
+
+ mul r4, r12, r4 ; b * 171
+ mla r6, lr, r5, r4 ; a * 85 + b *171
+
+ add r6, r6, #128
+ mov r6, r6, lsr #8
+ strb r6, [dest]
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ;|horizontal_line_2_3_scale_armv4|
+
+;/****************************************************************************
+; *
+; * ROUTINE : vertical_band_2_3_scale_armv4
+; *
+; * INPUTS : unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_pitch : Stride of destination data.
+; * unsigned int dest_width : Width of destination data.
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The
+; * height of the band scaled is 2-pixels.
+; *
+; * SPECIAL NOTES : The routine uses the first line of the band below
+; * the current band.
+; *
+; ****************************************************************************/
+;void vertical_band_2_3_scale_armv4
+;(
+; r0 = UINT8 *dest
+; r1 = UINT32 dest_pitch
+; r2 = UINT32 dest_width
+;)
+|vertical_band_2_3_scale_armv4| PROC
+ stmdb sp!, {r4 - r8, lr}
+ ldr lr, =85
+ ldr r12, =171
+ add r3, r1, r1, lsl #1 ; 3 * dest_pitch
+
+vl23_loop
+ ldrb r4, [src] ; a = des [0]
+ ldrb r5, [src, r1] ; b = des [dest_pitch]
+ ldrb r7, [src, r3] ; c = des [dest_pitch*3]
+ subs r2, r2, #1
+
+ mul r5, r12, r5 ; b * 171
+ mla r6, lr, r4, r5 ; a * 85
+ mla r8, lr, r7, r5 ; c * 85
+
+ add r6, r6, #128
+ mov r6, r6, lsr #8
+ strb r6, [src, r1]
+
+ add r8, r8, #128
+ mov r8, r8, lsr #8
+ strb r8, [src, r1, lsl #1]
+
+ add src, src, #1
+
+ bne vl23_loop
+
+ ldmia sp!, {r4 - r8, pc}
+ ENDP ;|vertical_band_2_3_scale_armv4|
+
+;/****************************************************************************
+; *
+; * ROUTINE : vp8cx_horizontal_line_3_5_scale_c
+; *
+; * INPUTS : const unsigned char *source : Pointer to source data.
+; * unsigned int source_width : Stride of source.
+; * unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_width : Stride of destination (NOT USED).
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Copies horizontal line of pixels from source to
+; * destination scaling up by 3 to 5.
+; *
+; * SPECIAL NOTES : None.
+; *
+; *
+; ****************************************************************************/
+;void vp8cx_horizontal_line_3_5_scale_c
+;(
+; const unsigned char *source,
+; unsigned int source_width,
+; unsigned char *dest,
+; unsigned int dest_width
+;)
+|horizontal_line_3_5_scale_armv4| PROC
+ stmdb sp!, {r4 - r11, lr}
+
+ ldr c51_205, =0x3300cd
+ ldr c102_154, =0x66009a
+
+ ldrb r4, [src], #1 ; a = src[0]
+
+hl35_loop
+
+ ldrb r8, [src], #1 ; b = src[1]
+ strb r4, [dest], #1
+
+ orr r6, r4, r8, lsl #16 ; b | a
+ ldrb r9, [src], #1 ; c = src[2]
+ mul r6, c102_154, r6 ; a * 102 + 154 * b
+
+ orr r5, r9, r8, lsl #16 ; b | c
+ mul r5, c51_205, r5 ; b * 205 + 51 * c
+ add r6, r6, #0x8000
+ ldrb r4, [src], #1 ; d = src[3]
+ mov r6, r6, lsr #24
+ strb r6, [dest], #1
+
+ orr r7, r8, r9, lsl #16 ; c | b
+ mul r7, c51_205, r7 ; c * 205 + 154 * b
+ add r5, r5, #0x8000
+ mov r5, r5, lsr #24
+ strb r5, [dest], #1
+
+ orr r9, r4, r9, lsl #16 ; c | d
+ mul r9, c102_154, r9 ; c * 154 + 102 * d
+ add r7, r7, #0x8000
+ mov r7, r7, lsr #24
+ strb r7, [dest], #1
+
+ add r9, r9, #0x8000
+ subs srcw, srcw, #3
+ mov r9, r9, lsr #24
+ strb r9, [dest], #1
+
+ bpl hl35_loop
+
+ ldrb r5, [src], #1 ; b = src[1]
+ strb r4, [dest], #1
+
+ orr r6, r4, r8, lsl #16 ; b | a
+ ldrb r9, [src], #1 ; c = src[2]
+ mul r6, c102_154, r6 ; a * 102 + 154 * b
+
+ orr r5, r9, r8, lsl #16 ; b | c
+ mul r5, c51_205, r5 ; b * 205 + 51 * c
+ add r6, r6, #0x8000
+ mov r6, r6, lsr #24
+ strb r6, [dest], #1
+
+ orr r7, r8, r9, lsl #16 ; c | b
+ mul r7, c51_205, r7 ; c * 205 + 154 * b
+ add r5, r5, #0x8000
+ mov r5, r5, lsr #24
+ strb r5, [dest], #1
+
+ add r7, r7, #0x8000
+ mov r7, r7, lsr #24
+ strb r7, [dest], #1
+ strb r9, [dest], #1
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ;|vp8cx_horizontal_line_3_5_scale_c|
+
+
+;/****************************************************************************
+; *
+; * ROUTINE : vp8cx_vertical_band_3_5_scale_c
+; *
+; * INPUTS : unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_pitch : Stride of destination data.
+; * unsigned int dest_width : Width of destination data.
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The
+; * height of the band scaled is 3-pixels.
+; *
+; * SPECIAL NOTES : The routine uses the first line of the band below
+; * the current band.
+; *
+; ****************************************************************************/
+;void vertical_band_4_5_scale_armv4
+;(
+; r0 = UINT8 *dest
+; r1 = UINT32 dest_pitch
+; r2 = UINT32 dest_width
+;)
+|vertical_band_3_5_scale_armv4| PROC
+ stmdb sp!, {r4 - r11, lr}
+
+ ldr c51_205, =0x3300cd
+ ldr c102_154, =0x66009a
+
+vl35_loop
+ mov r3, src
+ ldrb r4, [r3], r1 ; a = des [0]
+ ldrb r5, [r3], r1 ; b = des [dest_pitch]
+ ldrb r7, [r3], r1 ; c = des[dest_pitch*2]
+ add lr, src, r1
+
+ orr r8, r4, r5, lsl #16 ; b | a
+ mul r6, c102_154, r8 ; a * 102 + 154 * b
+
+ ldrb r8, [r3, r1, lsl #1] ; d = des[dest_pitch*5]
+ orr r3, r7, r5, lsl #16 ; b | c
+ mul r9, c51_205, r3 ; b * 205 + 51 * c
+ add r6, r6, #0x8000
+ orr r3, r5, r7, lsl #16 ; c | b
+ mov r6, r6, lsr #24
+ strb r6, [lr], r1
+
+ mul r5, c51_205, r3 ; c * 205 + 154 * b
+ add r9, r9, #0x8000
+ orr r3, r8, r7, lsl #16 ; c | d
+ mov r9, r9, lsr #24
+ strb r9, [lr], r1
+
+ mul r7, c102_154, r3 ; c * 154 + 102 * d
+ add r5, r5, #0x8000
+ add src, src, #1
+ mov r5, r5, lsr #24
+ strb r5, [lr], r1
+
+ add r7, r7, #0x8000
+ subs r2, r2, #1
+ mov r7, r7, lsr #24
+ strb r7, [lr], r1
+
+
+ bne vl35_loop
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ;|vertical_band_3_5_scale_armv4|
+
+;/****************************************************************************
+; *
+; * ROUTINE : horizontal_line_3_4_scale_armv4
+; *
+; * INPUTS : const unsigned char *source : Pointer to source data.
+; * unsigned int source_width : Stride of source.
+; * unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_width : Stride of destination (NOT USED).
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Copies horizontal line of pixels from source to
+; * destination scaling up by 3 to 4.
+; *
+; * SPECIAL NOTES : None.
+; *
+; *
+; ****************************************************************************/
+;void horizontal_line_3_4_scale_armv4
+;(
+; const unsigned char *source,
+; unsigned int source_width,
+; unsigned char *dest,
+; unsigned int dest_width
+;)
+|horizontal_line_3_4_scale_armv4| PROC
+ stmdb sp!, {r4 - r11, lr}
+
+ ldr r10, =64
+ ldr r11, =192
+ mov r9, #128
+
+ ldrb r4, [src], #1 ; a = src[0]
+
+hl34_loop
+
+ ldrb r8, [src], #1 ; b = src[1]
+ ldrb r7, [src], #1 ; c = src[2]
+ strb r4, [dest], #1
+
+ mla r4, r10, r4, r9 ; a*64 + 128
+ mla r4, r11, r8, r4 ; a*64 + b*192 + 1
+
+ add r8, r8, #1 ; b + 1
+ add r8, r8, r7 ; b + c + 1
+ mov r8, r8, asr #1 ; (b + c + 1) >> 1
+
+ mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8
+ strb r4, [dest], #1
+
+ strb r8, [dest], #1
+
+ ldrb r4, [src], #1 ; [a+1]
+
+ mla r7, r11, r7, r9 ; c*192 + 128
+ mla r7, r4, r10, r7 ; a*64 + b*192 + 128
+
+ subs srcw, srcw, #3
+
+ mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8
+ strb r7, [dest], #1
+
+ bpl hl34_loop
+
+ ldrb r8, [src], #1 ; b = src[1]
+ ldrb r7, [src], #1 ; c = src[2]
+ strb r4, [dest], #1
+
+ mla r4, r10, r4, r9 ; a*64 + 128
+ mla r4, r11, r8, r4 ; a*64 + b*192 + 1
+ mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8
+ strb r4, [dest], #1
+
+ add r8, r8, #1 ; b + 1
+ add r8, r8, r7 ; b + c + 1
+ mov r8, r8, asr #1 ; (b + c + 1) >> 1
+ strb r8, [dest], #1
+ strb r7, [dest], #1
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ;|vp8cx_horizontal_line_3_4_scale_c|
+
+
+;/****************************************************************************
+; *
+; * ROUTINE : vertical_band_3_4_scale_armv4
+; *
+; * INPUTS : unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_pitch : Stride of destination data.
+; * unsigned int dest_width : Width of destination data.
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The
+; * height of the band scaled is 3-pixels.
+; *
+; * SPECIAL NOTES : The routine uses the first line of the band below
+; * the current band.
+; *
+; ****************************************************************************/
+;void vertical_band_3_4_scale_armv4
+;(
+; r0 = UINT8 *dest
+; r1 = UINT32 dest_pitch
+; r2 = UINT32 dest_width
+;)
+|vertical_band_3_4_scale_armv4| PROC
+ stmdb sp!, {r4 - r11, lr}
+
+ ldr r10, =64
+ ldr r11, =192
+ mov r9, #128
+
+; ldr r1,[r1]
+vl34_loop
+ mov r3, src
+ ldrb r4, [r3], r1 ; a = des [0]
+ ldrb r5, [r3], r1 ; b = des [dest_pitch]
+ ldrb r7, [r3], r1 ; c = des [dest_pitch*2]
+ add lr, src, r1
+
+ mla r4, r10, r4, r9 ; a*64 + 128
+ mla r4, r11, r5, r4 ; a*64 + b*192 + 1
+
+ add r5, r5, #1 ; b + 1
+ add r5, r5, r7 ; b + c + 1
+ mov r5, r5, asr #1 ; (b + c + 1) >> 1
+
+ mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8
+ strb r4, [lr], r1
+
+ ldrb r4, [r3, r1] ; a = des [dest_pitch*4]
+
+ strb r5, [lr], r1
+
+ mla r7, r11, r7, r9 ; c*192 + 128
+ mla r7, r4, r10, r7 ; a*64 + b*192 + 128
+ mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8
+
+ add src, src, #1
+ subs r2, r2, #1
+
+ strb r7, [lr]
+
+ bne vl34_loop
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ;|vertical_band_3_4_scale_armv4|
+
+;/****************************************************************************
+; *
+; * ROUTINE : vp8cx_horizontal_line_1_2_scale_c
+; *
+; * INPUTS : const unsigned char *source : Pointer to source data.
+; * unsigned int source_width : Stride of source.
+; * unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_width : Stride of destination (NOT USED).
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Copies horizontal line of pixels from source to
+; * destination scaling up by 1 to 2.
+; *
+; * SPECIAL NOTES : None.
+; *
+; ****************************************************************************/
+;void vp8cx_horizontal_line_1_2_scale_c
+;(
+; const unsigned char *source,
+; unsigned int source_width,
+; unsigned char *dest,
+; unsigned int dest_width
+;)
+|horizontal_line_1_2_scale_armv4| PROC
+ stmdb sp!, {r4 - r5, lr}
+
+ sub srcw, srcw, #1
+
+ ldrb r3, [src], #1
+ ldrb r4, [src], #1
+hl12_loop
+ subs srcw, srcw, #1
+
+ add r5, r3, r4
+ add r5, r5, #1
+ mov r5, r5, lsr #1
+
+ orr r5, r3, r5, lsl #8
+ strh r5, [dest], #2
+
+ mov r3, r4
+
+ ldrneb r4, [src], #1
+ bne hl12_loop
+
+ orr r5, r4, r4, lsl #8
+ strh r5, [dest]
+
+ ldmia sp!, {r4 - r5, pc}
+ ENDP ;|vertical_band_3_5_scale_armv4|
+
+;/****************************************************************************
+; *
+; * ROUTINE : vp8cx_vertical_band_1_2_scale_c
+; *
+; * INPUTS : unsigned char *dest : Pointer to destination data.
+; * unsigned int dest_pitch : Stride of destination data.
+; * unsigned int dest_width : Width of destination data.
+; *
+; * OUTPUTS : None.
+; *
+; * RETURNS : void
+; *
+; * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The
+; * height of the band scaled is 1-pixel.
+; *
+; * SPECIAL NOTES : The routine uses the first line of the band below
+; * the current band.
+; *
+; ****************************************************************************/
+;void vp8cx_vertical_band_1_2_scale_c
+;(
+; r0 = UINT8 *dest
+; r1 = UINT32 dest_pitch
+; r2 = UINT32 dest_width
+;)
+|vertical_band_1_2_scale_armv4| PROC
+ stmdb sp!, {r4 - r7, lr}
+
+ ldr mask, =0xff00ff ; mask for selection
+ ldr lr, = 0x010001
+
+vl12_loop
+ mov r3, src
+ ldr r4, [r3], r1
+ ldr r5, [r3, r1]
+
+ add src, src, #4
+ subs r2, r2, #4
+
+ and r6, r4, mask
+ and r7, r5, mask
+
+ add r6, r7, r6
+ add r6, r6, lr
+
+ and r4, mask, r4, lsr #8
+ and r5, mask, r5, lsr #8
+
+ mov r6, r6, lsr #1
+ and r6, r6, mask
+
+ add r4, r5, r4
+ add r4, r4, lr
+
+ mov r4, r4, lsr #1
+ and r4, r4, mask
+
+ orr r5, r6, r4, lsl #8
+
+ str r5, [r3]
+
+ bpl vl12_loop
+
+ ldmia sp!, {r4 - r7, pc}
+ ENDP ;|vertical_band_3_5_scale_armv4|
+
+ END