diff options
author | Robin Watts <robin@xiph.org> | 2010-05-17 20:37:59 +0000 |
---|---|---|
committer | Robin Watts <robin@xiph.org> | 2010-05-17 20:37:59 +0000 |
commit | d4265438d16fd5dcb4df5341a86b215a2101c8e3 (patch) | |
tree | 616ec775dcf50949574e9c0d804f001141736103 | |
parent | 77c9cbe128eebd006cb6bc0e9930833f9564eaf0 (diff) | |
download | tremor-d4265438d16fd5dcb4df5341a86b215a2101c8e3.tar.gz |
Initial commit of merged bitwise functions from Tremolo.
For now arm configuration -> _ARM_ASSEM_ -> little endian ARM architecture.
git-svn-id: https://svn.xiph.org/branches/lowmem-branch/Tremolo@17227 0101bb08-14d6-0310-b084-bc0e0c8e3800
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | bitwise.c | 4 | ||||
-rw-r--r-- | bitwiseARM.s | 368 | ||||
-rw-r--r-- | ogg.h | 7 |
4 files changed, 380 insertions, 2 deletions
diff --git a/Makefile.am b/Makefile.am index 830ff77..b58fb74 100644 --- a/Makefile.am +++ b/Makefile.am @@ -10,7 +10,8 @@ lib_LTLIBRARIES = libvorbisidec.la if ARM_TARGET # Build both low and full accuracy versions and the linker will only # include the appropriate versions. -TARGET_SPECIFIC_SOURCES = mdctARM.s mdctLARM.s floor1ARM.s floor1LARM.s +TARGET_SPECIFIC_SOURCES = mdctARM.s mdctLARM.s floor1ARM.s floor1LARM.s \ + bitwiseARM.s else TARGET_SPECIFIC_SOURCES = endif @@ -23,6 +23,7 @@ #include "misc.h" #include "ogg.h" +#ifndef _ARM_ASSEM_ static unsigned long mask[]= {0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f, 0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff, @@ -146,18 +147,21 @@ void oggpack_adv(oggpack_buffer *b,int bits){ b->headptr+=(bits>>3); if(b->headend<1)_span(b); } +#endif int oggpack_eop(oggpack_buffer *b){ if(b->headend<0)return -1; return 0; } +#ifdef _ARM_ASSEM_ /* bits <= 32 */ long oggpack_read(oggpack_buffer *b,int bits){ long ret=oggpack_look(b,bits); oggpack_adv(b,bits); return(ret); } +#endif long oggpack_bytes(oggpack_buffer *b){ if(b->headend<0)return b->count+b->head->length; diff --git a/bitwiseARM.s b/bitwiseARM.s new file mode 100644 index 0000000..421b4b2 --- /dev/null +++ b/bitwiseARM.s @@ -0,0 +1,368 @@ +; Tremolo library +; Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd + + AREA |.text|, CODE, READONLY + + EXPORT oggpack_look + EXPORT oggpack_adv + EXPORT oggpack_readinit + EXPORT oggpack_read + +oggpack_look + ; r0 = oggpack_buffer *b + ; r1 = int bits + STMFD r13!,{r10,r11,r14} + LDMIA r0,{r2,r3,r12} + ; r2 = bitsLeftInSegment + ; r3 = ptr + ; r12= bitsLeftInWord + SUBS r2,r2,r1 ; bitsLeftinSegment -= bits + BLT look_slow ; Not enough bits in this segment for + ; this request. Do it slowly. + LDR r10,[r3] ; r10= ptr[0] + RSB r14,r12,#32 ; r14= 32-bitsLeftInWord + SUBS r12,r12,r1 ; r12= bitsLeftInWord -= bits + LDRLT r11,[r3,#4]! ; r11= ptr[1] + MOV r10,r10,LSR r14 ; r10= ptr[0]>>(32-bitsLeftInWord) + ADDLE r12,r12,#32 ; r12= bitsLeftInWord += 32 + RSB r14,r14,#32 ; r14= 32-bitsLeftInWord + ORRLT r10,r10,r11,LSL r14 ; r10= Next 32 bits. + MOV r14,#1 + RSB r14,r14,r14,LSL r1 + AND r0,r10,r14 + LDMFD r13!,{r10,r11,PC} + +look_slow + STMFD r13!,{r5,r6} + ADDS r10,r2,r1 ; r10= bitsLeftInSegment + bits (i.e. + ; the initial value of bitsLeftInSeg) + ; r10 = bitsLeftInSegment (initial) + ; r12 = bitsLeftInWord + RSB r14,r12,#32 ; r14= 32-bitsLeftInWord + MOV r5,r10 ; r5 = bitsLeftInSegment (initial) + BLT look_overrun + BEQ look_next_segment ; r10= r12 = 0, if we branch + CMP r12,r10 ; If bitsLeftInWord < bitsLeftInSeg + ; there must be more in the next word + LDR r10,[r3],#4 ; r10= ptr[0] + LDRLT r6,[r3] ; r6 = ptr[1] + MOV r11,#1 + MOV r10,r10,LSR r14 ; r10= first bitsLeftInWord bits + ORRLT r10,r10,r6,LSL r12 ; r10= first bitsLeftInSeg bits+crap + RSB r11,r11,r11,LSL r5 ; r11= mask + AND r10,r10,r11 ; r10= first r5 bits + ; Load the next segments data +look_next_segment + ; At this point, r10 contains the first r5 bits of the result + LDR r11,[r0,#12] ; r11= head = b->head + ; Stall + ; Stall +look_next_segment_2 + LDR r11,[r11,#12] ; r11= head = head->next + ; Stall + ; Stall + CMP r11,#0 + BEQ look_out_of_data + LDMIA r11,{r6,r12,r14} ; r6 = buffer + ; r12= begin + ; r14= length + LDR r6,[r6] ; r6 = buffer->data + CMP r14,#0 + BEQ look_next_segment_2 + ADD r6,r6,r12 ; r6 = buffer->data+begin +look_slow_loop + LDRB r12,[r6],#1 ; r12= *buffer + SUBS r14,r14,#1 ; r14= length + ; Stall + ORR r10,r10,r12,LSL r5 ; r10= first r5+8 bits + ADD r5,r5,#8 + BLE look_really_slow + CMP r5,r1 + BLT look_slow_loop + MOV r14,#1 + RSB r14,r14,r14,LSL r1 + AND r0,r10,r14 + LDMFD r13!,{r5,r6,r10,r11,PC} + + +look_really_slow + CMP r5,r1 + BLT look_next_segment_2 + MOV r14,#1 + RSB r14,r14,r14,LSL r1 + AND r0,r10,r14 + LDMFD r13!,{r5,r6,r10,r11,PC} + +look_out_of_data + ;MVN r0,#0 ; return -1 + MOV r0,#0 + LDMFD r13!,{r5,r6,r10,r11,PC} + +look_overrun + ; We had overrun when we started, so we need to skip -r10 bits. + LDR r11,[r0,#12] ; r11 = head = b->head + ; stall + ; stall +look_overrun_next_segment + LDR r11,[r11,#12] ; r11 = head->next + ; stall + ; stall + CMP r11,#0 + BEQ look_out_of_data + LDMIA r11,{r6,r7,r14} ; r6 = buffer + ; r7 = begin + ; r14= length + LDR r6,[r6] ; r6 = buffer->data + ; stall + ; stall + ADD r6,r6,r7 ; r6 = buffer->data+begin + MOV r14,r14,LSL #3 ; r14= length in bits + ADDS r14,r14,r10 ; r14= length in bits-bits to skip + MOVLE r10,r14 + BLE look_overrun_next_segment + RSB r10,r10,#0 ; r10= bits to skip + ADD r6,r10,r10,LSR #3 ; r6 = pointer to data + MOV r10,#0 + B look_slow_loop + +oggpack_adv + ; r0 = oggpack_buffer *b + ; r1 = bits + LDMIA r0,{r2,r3,r12} + ; r2 = bitsLeftInSegment + ; r3 = ptr + ; r12= bitsLeftInWord + SUBS r2,r2,r1 ; Does this run us out of bits in the + BLE adv_slow ; segment? If so, do it slowly + SUBS r12,r12,r1 + ADDLE r12,r12,#32 + ADDLE r3,r3,#4 + STMIA r0,{r2,r3,r12} + MOV PC,R14 +adv_slow + STMFD r13!,{r10,r14} + + LDR r14,[r0,#12] ; r14= head + ; stall +adv_slow_loop + LDR r1,[r0,#20] ; r1 = count + LDR r10,[r14,#8] ; r10= head->length + LDR r14,[r14,#12] ; r14= head->next + ; stall + ADD r1,r1,r10 ; r1 = count += head->length + CMP r14,#0 + BEQ adv_end + STR r1,[r0,#20] ; b->count = count + STR r14,[r0,#12] ; b->head = head + LDMIA r14,{r3,r10,r12} ; r3 = buffer + ; r10= begin + ; r12= length + LDR r3,[r3] ; r3 = buffer->data + ADD r3,r3,r10 ; r3 = Pointer to start (byte) + AND r10,r3,#3 ; r10= bytes to backtrk to word align + MOV r10,r10,LSL #3 ; r10= bits to backtrk to word align + RSB r10,r10,#32 ; r10= bits left in word + ADDS r10,r10,r2 ; r10= bits left in word after skip + ADDLE r10,r10,#32 + ADDLE r3,r3,#4 + BIC r3,r3,#3 ; r3 = Pointer to start (word) + ADDS r2,r2,r12,LSL #3 ; r2 = length in bits after advance + BLE adv_slow_loop + STMIA r0,{r2,r3,r10} + + LDMFD r13!,{r10,PC} +adv_end + MOV r2, #0 + MOV r12,#0 + STMIA r0,{r2,r3,r12} + + LDMFD r13!,{r10,PC} + +oggpack_readinit + ; r0 = oggpack_buffer *b + ; r1 = oggreference *r + STR r1,[r0,#12] ; b->head = r1 + STR r1,[r0,#16] ; b->tail = r1 + LDMIA r1,{r2,r3,r12} ; r2 = b->head->buffer + ; r3 = b->head->begin + ; r12= b->head->length + LDR r2,[r2] ; r2 = b->head->buffer->data + MOV r1,r12,LSL #3 ; r1 = BitsInSegment + MOV r12,#0 + ADD r3,r2,r3 ; r3 = r2+b->head->begin + BIC r2,r3,#3 ; r2 = b->headptr (word) + AND r3,r3,#3 + MOV r3,r3,LSL #3 + RSB r3,r3,#32 ; r3 = BitsInWord + STMIA r0,{r1,r2,r3} + STR r12,[r0,#20] + MOV PC,R14 + +oggpack_read + ; r0 = oggpack_buffer *b + ; r1 = int bits + STMFD r13!,{r10,r11,r14} + LDMIA r0,{r2,r3,r12} + ; r2 = bitsLeftInSegment + ; r3 = ptr + ; r12= bitsLeftInWord + SUBS r2,r2,r1 ; bitsLeftinSegment -= bits + BLT read_slow ; Not enough bits in this segment for + ; this request. Do it slowly. + LDR r10,[r3] ; r10= ptr[0] + RSB r14,r12,#32 ; r14= 32-bitsLeftInWord + SUBS r12,r12,r1 ; r12= bitsLeftInWord -= bits + ADDLE r3,r3,#4 + LDRLT r11,[r3] ; r11= ptr[1] + MOV r10,r10,LSR r14 ; r10= ptr[0]>>(32-bitsLeftInWord) + ADDLE r12,r12,#32 ; r12= bitsLeftInWord += 32 + RSB r14,r14,#32 ; r14= 32-bitsLeftInWord + ORRLT r10,r10,r11,LSL r14 ; r10= Next 32 bits. + STMIA r0,{r2,r3,r12} + MOV r14,#1 + RSB r14,r14,r14,LSL r1 + AND r0,r10,r14 + LDMFD r13!,{r10,r11,PC} + +read_slow + STMFD r13!,{r5,r6} + ADDS r10,r2,r1 ; r10= bitsLeftInSegment + bits (i.e. + ; the initial value of bitsLeftInSeg) + ; r10 = bitsLeftInSegment (initial) + ; r12 = bitsLeftInWord + RSB r14,r12,#32 ; r14= 32-bitsLeftInWord + MOV r5,r10 ; r5 = bitsLeftInSegment (initial) + BLT read_overrun + BEQ read_next_segment ; r10= r12 = 0, if we branch + CMP r12,r10 ; If bitsLeftInWord < bitsLeftInSeg + ; there must be more in the next word + LDR r10,[r3],#4 ; r10= ptr[0] + LDRLT r6,[r3] ; r6 = ptr[1] + MOV r11,#1 + MOV r10,r10,LSR r14 ; r10= first bitsLeftInWord bits + ORRLT r10,r10,r6,LSL r12 ; r10= first bitsLeftInSeg bits+crap + RSB r11,r11,r11,LSL r5 ; r11= mask + AND r10,r10,r11 ; r10= first r5 bits + ; Load the next segments data +read_next_segment + ; At this point, r10 contains the first r5 bits of the result + LDR r11,[r0,#12] ; r11= head = b->head + ; Stall +read_next_segment_2 + ; r11 = head + LDR r6,[r0,#20] ; r6 = count + LDR r12,[r11,#8] ; r12= length + LDR r11,[r11,#12] ; r11= head = head->next + ; Stall + ADD r6,r6,r12 ; count += length + CMP r11,#0 + BEQ read_out_of_data + STR r11,[r0,#12] + STR r6,[r0,#20] ; b->count = count + LDMIA r11,{r6,r12,r14} ; r6 = buffer + ; r12= begin + ; r14= length + LDR r6,[r6] ; r6 = buffer->data + CMP r14,#0 + BEQ read_next_segment_2 + ADD r6,r6,r12 ; r6 = buffer->data+begin +read_slow_loop + LDRB r12,[r6],#1 ; r12= *buffer + SUBS r14,r14,#1 ; r14= length + ; Stall + ORR r10,r10,r12,LSL r5 ; r10= first r5+8 bits + ADD r5,r5,#8 + BLE read_really_slow + CMP r5,r1 + BLT read_slow_loop +read_end + MOV r12,#1 + RSB r12,r12,r12,LSL r1 + + ; Store back the new position + ; r2 = -number of bits to go from this segment + ; r6 = ptr + ; r14= bytesLeftInSegment + ; r11= New head value + LDMIA r11,{r3,r6,r14} ; r3 = buffer + ; r6 = begin + ; r14= length + LDR r3,[r3] ; r3 = buffer->data + ADD r1,r2,r14,LSL #3 ; r1 = bitsLeftInSegment + ; stall + ADD r6,r3,r6 ; r6 = pointer + AND r3,r6,#3 ; r3 = bytes used in first word + RSB r3,r2,r3,LSL #3 ; r3 = bits used in first word + BIC r2,r6,#3 ; r2 = word ptr + RSBS r3,r3,#32 ; r3 = bitsLeftInWord + ADDLE r3,r3,#32 + ADDLE r2,r2,#4 + STMIA r0,{r1,r2,r3} + + AND r0,r10,r12 + LDMFD r13!,{r5,r6,r10,r11,PC} + + +read_really_slow + CMP r5,r1 + BGE read_end + LDR r14,[r11,#8] ; r14= length of segment just done + ; stall + ; stall + ADD r2,r2,r14,LSL #3 ; r2 = -bits to use from next seg + B read_next_segment_2 + +read_out_of_data + ; Store back the new position + ; r2 = -number of bits to go from this segment + ; r6 = ptr + ; r14= bytesLeftInSegment + ; RJW: This may be overkill - we leave the buffer empty, with -1 + ; bits left in it. We might get away with just storing the + ; bitsLeftInSegment as -1. + LDR r11,[r0,#12] ; r11=head + + LDMIA r11,{r3,r6,r14} ; r3 = buffer + ; r6 = begin + ; r14= length + LDR r3,[r3] ; r3 = buffer->data + ADD r6,r3,r6 ; r6 = pointer + ADD r6,r6,r14 + AND r3,r6,#3 ; r3 = bytes used in first word + MOV r3,r3,LSL #3 ; r3 = bits used in first word + BIC r2,r6,#3 ; r2 = word ptr + RSBS r3,r3,#32 ; r3 = bitsLeftInWord + MVN r1,#0 ; r1 = -1 = bitsLeftInSegment + STMIA r0,{r1,r2,r3} + ;MVN r0,#0 ; return -1 + MOV r0,#0 + LDMFD r13!,{r5,r6,r10,r11,PC} + +read_overrun + ; We had overrun when we started, so we need to skip -r10 bits. + LDR r11,[r0,#12] ; r11 = head = b->head + ; stall + ; stall +read_overrun_next_segment + LDR r11,[r11,#12] ; r11 = head->next + ; stall + ; stall + CMP r11,#0 + BEQ read_out_of_data + LDMIA r11,{r6,r7,r14} ; r6 = buffer + ; r7 = begin + ; r14= length + LDR r6,[r6] ; r6 = buffer->data + ; stall + ; stall + ADD r6,r6,r7 ; r6 = buffer->data+begin + MOV r14,r14,LSL #3 ; r14= length in bits + ADDS r14,r14,r10 ; r14= length in bits-bits to skip + MOVLE r10,r14 + BLE read_overrun_next_segment + RSB r10,r10,#0 ; r10= bits to skip + ADD r6,r10,r10,LSR #3 ; r6 = pointer to data + MOV r10,#0 + B read_slow_loop + + END @@ -50,10 +50,15 @@ typedef struct ogg_reference { } ogg_reference; typedef struct oggpack_buffer { +#ifdef _ARM_ASSEM_ + int bitsLeftInSegment; + ogg_uint32_t *ptr; + long bitsLeftInWord; +#else int headbit; unsigned char *headptr; long headend; - +#endif /* memory management */ ogg_reference *head; ogg_reference *tail; |