summaryrefslogtreecommitdiff
path: root/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
blob: 72cd2d9a0a0183be128a9d2355fac6d441c4c479 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
@
@ Written by Wilco Dijkstra, 1996. The following email exchange establishes the
@ license.
@
@ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
@ Date: Fri, Jun 24, 2011 at 3:20 AM
@ Subject: Re: sqrt routine
@ To: Kevin Ma <kma@google.com>
@ Hi Kevin,
@ Thanks for asking. Those routines are public domain (originally posted to
@ comp.sys.arm a long time ago), so you can use them freely for any purpose.
@ Cheers,
@ Wilco
@
@ ----- Original Message -----
@ From: "Kevin Ma" <kma@google.com>
@ To: <Wilco.Dijkstra@ntlworld.com>
@ Sent: Thursday, June 23, 2011 11:44 PM
@ Subject: Fwd: sqrt routine
@ Hi Wilco,
@ I saw your sqrt routine from several web sites, including
@ http://www.finesse.demon.co.uk/steven/sqrt.html.
@ Just wonder if there's any copyright information with your Successive
@ approximation routines, or if I can freely use it for any purpose.
@ Thanks.
@ Kevin

@ Minor modifications in code style for WebRTC, 2012.
@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.

@ Input :             r0 32 bit unsigned integer
@ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
@ Registers touched:  r1, r2

#include "webrtc/system_wrappers/include/asm_defines.h"

GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
.align  2
DEFINE_FUNCTION WebRtcSpl_SqrtFloor
  mov    r1, #3 << 30
  mov    r2, #1 << 30

  @ unroll for i = 0 .. 15

  cmp    r0, r2, ror #2 * 0
  subhs  r0, r0, r2, ror #2 * 0
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 1
  subhs  r0, r0, r2, ror #2 * 1
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 2
  subhs  r0, r0, r2, ror #2 * 2
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 3
  subhs  r0, r0, r2, ror #2 * 3
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 4
  subhs  r0, r0, r2, ror #2 * 4
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 5
  subhs  r0, r0, r2, ror #2 * 5
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 6
  subhs  r0, r0, r2, ror #2 * 6
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 7
  subhs  r0, r0, r2, ror #2 * 7
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 8
  subhs  r0, r0, r2, ror #2 * 8
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 9
  subhs  r0, r0, r2, ror #2 * 9
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 10
  subhs  r0, r0, r2, ror #2 * 10
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 11
  subhs  r0, r0, r2, ror #2 * 11
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 12
  subhs  r0, r0, r2, ror #2 * 12
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 13
  subhs  r0, r0, r2, ror #2 * 13
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 14
  subhs  r0, r0, r2, ror #2 * 14
  adc    r2, r1, r2, lsl #1

  cmp    r0, r2, ror #2 * 15
  subhs  r0, r0, r2, ror #2 * 15
  adc    r2, r1, r2, lsl #1

  bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
  bx lr