packages/libndsfpc/src/nds/arm9/math.inc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

{$ifdef NDS_INTERFACE}
//  Math coprocessor register definitions
const
  REG_DIVCNT          : pcuint16 = pointer($04000280);
  REG_DIV_NUMER       : pcint64 = pointer($04000290);
  REG_DIV_NUMER_L     : pcint32 = pointer($04000290);
  REG_DIV_NUMER_H     : pcint32 = pointer($04000294);
  REG_DIV_DENOM       : pcint64 = pointer($04000298);
  REG_DIV_DENOM_L     : pcint32 = pointer($04000298);
  REG_DIV_DENOM_H     : pcint32 = pointer($0400029C);
  REG_DIV_RESULT      : pcint64 = pointer($040002A0);
  REG_DIV_RESULT_L    : pcint32 = pointer($040002A0);
  REG_DIV_RESULT_H    : pcint32 = pointer($040002A4);
  REG_DIVREM_RESULT   : pcint64 = pointer($040002A8);
  REG_DIVREM_RESULT_L : pcint32 = pointer($040002A8);
  REG_DIVREM_RESULT_H : pcint32 = pointer($040002AC);
  REG_SQRTCNT         : pcuint16 = pointer($040002B0);
  REG_SQRT_PARAM      : pcint64 = pointer($040002B8);
  REG_SQRT_PARAM_L    : pcint32 = pointer($040002B8);
  REG_SQRT_PARAM_H    : pcint32 = pointer($040002BC);
  REG_SQRT_RESULT     : pcuint32 = pointer($040002B4);

//  Math coprocessor modes

  DIV_64_64		= 2;
  DIV_64_32		= 1;
  DIV_32_32		= 0;
  DIV_BUSY		= (1 shl 15);

  SQRT_64			= 1;
  SQRT_32			= 0;
  SQRT_BUSY		= (1 shl 15);

function inttof32(n: cint): cint32; inline;
function f32toint(n: cint32): cint; inline;
function floattof32(n: cfloat): cint32; inline;  //inlining it makes impossible to pass it to another function :/
function f32tofloat(n: cint32): cfloat; inline;
  
function divf32(num: cint32; den: cint32): cint32; inline;
function mulf32(a, b: cint32): cint32; inline;
function sqrtf32(a: cint32): cint32; inline;
function div32(num, den: cint32): cint32; inline;
function mod32(num, den: cint32): cint32; inline;
function div64(num: cint64; den: cint32): cint32; inline;
function mod64(num: cint64; den: cint32): cint32; inline;
function sqrt32(a: cint32): cuint32; inline;
function sqrt64(a: cint64): cuint32; inline;
procedure crossf32(a: pcint32; b: pcint32; res: pcint32); inline;
function dotf32(a, b: pcint32): cint32; inline;
procedure normalizef32(a: pcint32); inline;  
{$endif NDS_INTERFACE}

{$ifdef NDS_IMPLEMENTATION}
function inttof32(n: cint): cint32; inline;
begin
  inttof32 := ((n) shl 12);
end;

function f32toint(n: cint32): cint; inline;
begin
  f32toint := ((n) shr 12);
end;

function floattof32(n: cfloat): cint32; inline;
begin
  floattof32 := trunc((n) * (1 shl 12));
end;

// check it!
function f32tofloat(n: cint32): cfloat; inline;
begin
  f32tofloat := cfloat(n * 1.0) / cfloat((1 shl 12) * 1.0);
end;

function divf32(num: cint32; den: cint32): cint32; inline;
begin
  REG_DIVCNT^ := DIV_64_32;

  while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	REG_DIV_NUMER^ := cint64(num) shl 12;
	REG_DIV_DENOM_L^ := den;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	divf32 := REG_DIV_RESULT_L^;
end;

function mulf32(a, b: cint32): cint32; inline;
var
  rslt: clonglong;
begin
	rslt := clonglong(a) * clonglong(b);
	mulf32 := cint32(rslt shr 12);
end;

//  Fixed point square root
//	Takes 1.19.12 fixed point value and
//	returns the fixed point result
function sqrtf32(a: cint32): cint32; inline;
begin
	REG_SQRTCNT^ := SQRT_64;

	while (REG_SQRTCNT^ and SQRT_BUSY) <> 0 do;

	REG_SQRT_PARAM^ := cint64(a) shl 12;

	while (REG_SQRTCNT^ and SQRT_BUSY) <> 0 do;

	sqrtf32 := REG_SQRT_RESULT^;
end;

//  Integer versions

//  Integer divide
//  Takes a 32 bit numerator and 32 bit
//	denominator and returns 32 bit result
function div32(num, den: cint32): cint32; inline;
begin
	REG_DIVCNT^ := DIV_32_32;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	REG_DIV_NUMER_L^ := num;
	REG_DIV_DENOM_L^ := den;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	div32 := REG_DIV_RESULT_L^;
end;

//  Integer divide
//  Takes a 32 bit numerator and 32 bit
//	denominator and returns 32 bit result
function mod32(num, den: cint32): cint32; inline;
begin
	REG_DIVCNT^ := DIV_32_32;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	REG_DIV_NUMER_L^ := num;
	REG_DIV_DENOM_L^ := den;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	mod32 := REG_DIVREM_RESULT_L^;
end;

//  Integer divide
//	Takes a 64 bit numerator and 32 bit
//  denominator are returns 32 bit result
function div64(num: cint64; den: cint32): cint32; inline;
begin
	REG_DIVCNT^ := DIV_64_32;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	REG_DIV_NUMER^ := num;
	REG_DIV_DENOM_L^ := den;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	div64 := REG_DIV_RESULT_L^;
end;

//  Integer divide
//	Takes a 64 bit numerator and 32 bit
//  denominator are returns 32 bit result
function mod64(num: cint64; den: cint32): cint32; inline;
begin
	REG_DIVCNT^ := DIV_64_32;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	REG_DIV_NUMER^ := num;
	REG_DIV_DENOM_L^ := den;

	while (REG_DIVCNT^ and DIV_BUSY) <> 0 do;

	mod64 := REG_DIVREM_RESULT_L^;
end;

//  Integer square root
//  takes a 32 bit integer and returns
//	32 bit result
function sqrt32(a: cint32): cuint32; inline;
begin
	REG_SQRTCNT^ := SQRT_32;

	while(REG_SQRTCNT^ and SQRT_BUSY) <> 0 do;

	REG_SQRT_PARAM_L^ := a;

	while(REG_SQRTCNT^ and SQRT_BUSY) <> 0 do;

	sqrt32 := REG_SQRT_RESULT^;
end;

function sqrt64(a: cint64): cuint32; inline;
begin
	REG_SQRTCNT^ := SQRT_64;

	while(REG_SQRTCNT^ and SQRT_BUSY) <> 0 do;

	REG_SQRT_PARAM^ := a;

	while(REG_SQRTCNT^ and SQRT_BUSY) <> 0 do;

	sqrt64 := REG_SQRT_RESULT^;
end;

procedure crossf32(a: pcint32; b: pcint32; res: pcint32); inline;
begin
	res[0] := mulf32(a[1], b[2]) - mulf32(b[1], a[2]);
	res[1] := mulf32(a[2], b[0]) - mulf32(b[2], a[0]);
	res[2] := mulf32(a[0], b[1]) - mulf32(b[0], a[1]);
end;

function dotf32(a, b: pcint32): cint32; inline;
begin
	dotf32 := mulf32(a[0], b[0]) + mulf32(a[1], b[1]) + mulf32(a[2], b[2]);
end;

procedure normalizef32(a: pcint32); inline;
var
  magnitude: cint32;
begin
	magnitude := sqrtf32( mulf32(a[0], a[0]) + mulf32(a[1], a[1]) + mulf32(a[2], a[2]) );
	a[0] := divf32(a[0], magnitude);
	a[1] := divf32(a[1], magnitude);
	a[2] := divf32(a[2], magnitude);
end;
{$endif NDS_IMPLEMENTATION}