diff options
author | Monty <xiphmont@xiph.org> | 2000-10-19 10:21:02 +0000 |
---|---|---|
committer | Monty <xiphmont@xiph.org> | 2000-10-19 10:21:02 +0000 |
commit | b074c6e416dcabef56ebbcce6ae4e5e1c12af088 (patch) | |
tree | 592cf3d976c66bd003ca9805d2d5940d0f572b4c | |
parent | a454e26f286b9b9503b10d092110e326a78cebc4 (diff) | |
download | libvorbis-git-b074c6e416dcabef56ebbcce6ae4e5e1c12af088.tar.gz |
cast/rounding asm for vorbisfile float->int and lsp lookups
svn path=/branches/branch_beta3/vorbis/; revision=738
-rw-r--r-- | lib/lookup.c | 94 | ||||
-rw-r--r-- | lib/lsp.c | 369 | ||||
-rw-r--r-- | lib/os.h | 115 | ||||
-rw-r--r-- | lib/vorbisfile.c | 29 |
4 files changed, 601 insertions, 6 deletions
diff --git a/lib/lookup.c b/lib/lookup.c new file mode 100644 index 00000000..5589a1ed --- /dev/null +++ b/lib/lookup.c @@ -0,0 +1,94 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY * + * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. * + * PLEASE READ THESE TERMS DISTRIBUTING. * + * * + * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 * + * by Monty <monty@xiph.org> and The XIPHOPHORUS Company * + * http://www.xiph.org/ * + * * + ******************************************************************** + + function: lookup based functions + last mod: $Id: lookup.c,v 1.2.2.1 2000/10/19 10:21:02 xiphmont Exp $ + + ********************************************************************/ + +#include <math.h> +#include "lookup.h" +#include "lookup_data.h" +#include "os.h" + +#ifdef FLOAT_LOOKUP + +/* interpolated lookup based cos function, domain 0 to PI only */ +float vorbis_coslook(float a){ + double d=a*(.31830989*(float)COS_LOOKUP_SZ); + int i=vorbis_ftoi(d-.5); + + return COS_LOOKUP[i]+ (d-i)*(COS_LOOKUP[i+1]-COS_LOOKUP[i]); +} + +/* interpolated 1./sqrt(p) where .5 <= p < 1. */ +float vorbis_invsqlook(float a){ + double d=a*(2.*(float)INVSQ_LOOKUP_SZ)-(float)INVSQ_LOOKUP_SZ; + int i=vorbis_ftoi(d-.5); + return INVSQ_LOOKUP[i]+ (d-i)*(INVSQ_LOOKUP[i+1]-INVSQ_LOOKUP[i]); +} + +/* interpolated 1./sqrt(p) where .5 <= p < 1. */ +float vorbis_invsq2explook(int a){ + return INVSQ2EXP_LOOKUP[a-INVSQ2EXP_LOOKUP_MIN]; +} + +#include <stdio.h> +/* interpolated lookup based fromdB function, domain -140dB to 0dB only */ +float vorbis_fromdBlook(float a){ + int i=vorbis_ftoi(a*((float)(-(1<<FROMdB2_SHIFT)))-5.); + return (i<0)?1.: + ((i>=(FROMdB_LOOKUP_SZ<<FROMdB_SHIFT))?0.: + FROMdB_LOOKUP[i>>FROMdB_SHIFT]*FROMdB2_LOOKUP[i&FROMdB2_MASK]); +} + +#endif + +#ifdef INT_LOOKUP +/* interpolated 1./sqrt(p) where .5 <= a < 1. (.100000... to .111111...) in + 16.16 format + + returns in m.8 format */ +long vorbis_invsqlook_i(long a,long e){ + long i=(a&0x7fff)>>(INVSQ_LOOKUP_I_SHIFT-1); + long d=(a&INVSQ_LOOKUP_I_MASK)<<(16-INVSQ_LOOKUP_I_SHIFT); /* 0.16 */ + long val=INVSQ_LOOKUP_I[i]- /* 1.16 */ + (((INVSQ_LOOKUP_I[i]-INVSQ_LOOKUP_I[i+1])* /* 0.16 */ + d)>>16); /* result 1.16 */ + + e+=32; + if(e&1)val=(val*5792)>>13; /* multiply val by 1/sqrt(2) */ + e=(e>>1)-8; + + return(val>>e); +} + +/* interpolated lookup based fromdB function, domain -140dB to 0dB only */ +/* a is in n.12 format */ +float vorbis_fromdBlook_i(long a){ + int i=(-a)>>(12-FROMdB2_SHIFT); + return (i<0)?1.: + ((i>=(FROMdB_LOOKUP_SZ<<FROMdB_SHIFT))?0.: + FROMdB_LOOKUP[i>>FROMdB_SHIFT]*FROMdB2_LOOKUP[i&FROMdB2_MASK]); +} + +/* interpolated lookup based cos function, domain 0 to PI only */ +/* a is in 0.16 format, where 0==0, 2^^16-1==PI, return 0.14 */ +long vorbis_coslook_i(long a){ + int i=a>>COS_LOOKUP_I_SHIFT; + int d=a&COS_LOOKUP_I_MASK; + return COS_LOOKUP_I[i]- ((d*(COS_LOOKUP_I[i]-COS_LOOKUP_I[i+1]))>> + COS_LOOKUP_I_SHIFT); +} + +#endif diff --git a/lib/lsp.c b/lib/lsp.c new file mode 100644 index 00000000..d78be58e --- /dev/null +++ b/lib/lsp.c @@ -0,0 +1,369 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY * + * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. * + * PLEASE READ THESE TERMS DISTRIBUTING. * + * * + * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 * + * by Monty <monty@xiph.org> and The XIPHOPHORUS Company * + * http://www.xiph.org/ * + * * + ******************************************************************** + + function: LSP (also called LSF) conversion routines + last mod: $Id: lsp.c,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $ + + The LSP generation code is taken (with minimal modification) from + "On the Computation of the LSP Frequencies" by Joseph Rothweiler + <rothwlr@altavista.net>, available at: + + http://www2.xtdl.com/~rothwlr/lsfpaper/lsfpage.html + + ********************************************************************/ + +/* Note that the lpc-lsp conversion finds the roots of polynomial with + an iterative root polisher (CACM algorithm 283). It *is* possible + to confuse this algorithm into not converging; that should only + happen with absurdly closely spaced roots (very sharp peaks in the + LPC f response) which in turn should be impossible in our use of + the code. If this *does* happen anyway, it's a bug in the floor + finder; find the cause of the confusion (probably a single bin + spike or accidental near-float-limit resolution problems) and + correct it. */ + +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include "lsp.h" +#include "os.h" +#include "misc.h" +#include "lookup.h" +#include "scales.h" + +/* three possible LSP to f curve functions; the exact computation + (float), a lookup based float implementation, and an integer + implementation. The float lookup is likely the optimal choice on + any machine with an FPU. The integer implementation is *not* fixed + point (due to the need for a large dynamic range and thus a + seperately tracked exponent) and thus much more complex than the + relatively simple float implementations. It's mostly for future + work on a fully fixed point implementation for processors like the + ARM family. */ + +/* undefine both for the 'old' but more precise implementation */ +#define FLOAT_LOOKUP +#undef INT_LOOKUP + +#ifdef FLOAT_LOOKUP +#include "lookup.c" /* catch this in the build system; we #include for + compilers (like gcc) that can't inline across + modules */ + +/* side effect: changes *lsp to cosines of lsp */ +void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m, + float amp,float ampoffset){ + int i; + float wdel=M_PI/ln; + vorbis_fpu_control fpu; + + vorbis_fpu_setround(&fpu); + for(i=0;i<m;i++)lsp[i]=vorbis_coslook(lsp[i]); + + i=0; + while(i<n){ + int j,k=map[i]; + int qexp; + float p=.7071067812; + float q=.7071067812; + float w=vorbis_coslook(wdel*k); + + for(j=0;j<m;j+=2) p *= lsp[j]-w; + for(j=1;j<m;j+=2) q *= lsp[j]-w; + + q=frexp(p*p*(1.+w)+q*q*(1.-w),&qexp); + q=vorbis_fromdBlook(amp* + vorbis_invsqlook(q)* + vorbis_invsq2explook(qexp+m)- + ampoffset); + + curve[i++]=q; + while(map[i]==k)curve[i++]=q; + } + vorbis_fpu_restore(fpu); +} + +#else + +#ifdef INT_LOOKUP +#include "lookup.c" /* catch this in the build system; we #include for + compilers (like gcc) that can't inline across + modules */ + +static int MLOOP_1[64]={ + 0,10,11,11, 12,12,12,12, 13,13,13,13, 13,13,13,13, + 14,14,14,14, 14,14,14,14, 14,14,14,14, 14,14,14,14, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, +}; + +static int MLOOP_2[64]={ + 0,4,5,5, 6,6,6,6, 7,7,7,7, 7,7,7,7, + 8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8, + 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9, + 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9, +}; + +static int MLOOP_3[8]={0,1,2,2,3,3,3,3}; + + +/* side effect: changes *lsp to cosines of lsp */ +void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m, + float amp,float ampoffset){ + + /* 0 <= m < 256 */ + + /* set up for using all int later */ + int i; + int ampoffseti=rint(ampoffset*4096.); + int ampi=rint(amp*16.); + long *ilsp=alloca(m*sizeof(long)); + for(i=0;i<m;i++)ilsp[i]=vorbis_coslook_i(lsp[i]/M_PI*65536.+.5); + + i=0; + while(i<n){ + int j,k=map[i]; + unsigned long pi=46341; /* 2**-.5 in 0.16 */ + unsigned long qi=46341; + int qexp=0,shift; + long wi=vorbis_coslook_i(k*65536/ln); + + pi*=labs(ilsp[0]-wi); + qi*=labs(ilsp[1]-wi); + + for(j=2;j<m;j+=2){ + if(!(shift=MLOOP_1[(pi|qi)>>25])) + if(!(shift=MLOOP_2[(pi|qi)>>19])) + shift=MLOOP_3[(pi|qi)>>16]; + pi=(pi>>shift)*labs(ilsp[j]-wi); + qi=(qi>>shift)*labs(ilsp[j+1]-wi); + qexp+=shift; + } + if(!(shift=MLOOP_1[(pi|qi)>>25])) + if(!(shift=MLOOP_2[(pi|qi)>>19])) + shift=MLOOP_3[(pi|qi)>>16]; + pi>>=shift; + qi>>=shift; + qexp+=shift-7*m; + + /* pi,qi normalized collectively, both tracked using qexp */ + + /* p*=p(1-w), q*=q(1+w), let normalization drift because it isn't + worth tracking step by step */ + + pi=((pi*pi)>>16); + qi=((qi*qi)>>16); + qexp=qexp*2+m; + + qi*=(1<<14)-wi; + pi*=(1<<14)+wi; + + qi=(qi+pi)>>14; + + /* we've let the normalization drift because it wasn't important; + however, for the lookup, things must be normalized again. We + need at most one right shift or a number of left shifts */ + + if(qi&0xffff0000){ /* checks for 1.xxxxxxxxxxxxxxxx */ + qi>>=1; qexp++; + }else + while(qi && !(qi&0x8000)){ /* checks for 0.0xxxxxxxxxxxxxxx or less*/ + qi<<=1; qexp--; + } + + amp=vorbis_fromdBlook_i(ampi* /* n.4 */ + vorbis_invsqlook_i(qi,qexp)- + /* m.8, m+n<=8 */ + ampoffseti); /* 8.12[0] */ + + curve[i]=amp; + while(map[++i]==k)curve[i]=amp; + } +} + +#else + +/* old, nonoptimized but simple version for any poor sap who needs to + figure out what the hell this code does, or wants the other tiny + fraction of a dB precision */ + +/* side effect: changes *lsp to cosines of lsp */ +void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m, + float amp,float ampoffset){ + int i; + float wdel=M_PI/ln; + for(i=0;i<m;i++)lsp[i]=2*cos(lsp[i]); + + i=0; + while(i<n){ + int j,k=map[i]; + float p=.5; + float q=.5; + float w=2*cos(wdel*k); + for(j=0;j<m;j+=2){ + p *= w-lsp[j]; + q *= w-lsp[j+1]; + } + p*=p*(2.+w); + q*=q*(2.-w); + q=fromdB(amp/sqrt(p+q)-ampoffset); + + curve[i]=q; + while(map[++i]==k)curve[i]=q; + } +} + +#endif +#endif + +static void cheby(float *g, int ord) { + int i, j; + + g[0] *= 0.5; + for(i=2; i<= ord; i++) { + for(j=ord; j >= i; j--) { + g[j-2] -= g[j]; + g[j] += g[j]; + } + } +} + +static int comp(const void *a,const void *b){ + if(*(float *)a<*(float *)b) + return(1); + else + return(-1); +} + +/* This is one of those 'mathemeticians should not write code' kind of + cases. Newton's method of polishing roots is straightforward + enough... except in those cases where it just fails in the real + world. In our case below, we're worried about a local mini/maxima + shooting a root estimation off to infinity, or the new estimation + chaotically oscillating about convergence (shouldn't actually be a + problem in our usage. + + Maehly's modification (zero suppression, to prevent two tenative + roots from collapsing to the same actual root) similarly can + temporarily shoot a root off toward infinity. It would come + back... if it were not for the fact that machine representation has + limited dynamic range and resolution. This too is guarded by + limiting delta. + + Last problem is convergence criteria; we don't know what a 'double' + is on our hardware/compiler, and the convergence limit is bounded + by roundoff noise. So, we hack convergence: + + Require at most 1e-6 mean squared error for all zeroes. When + converging, start the clock ticking at 1e-6; limit our polishing to + as many more iterations as took us to get this far, 100 max. + + Past max iters, quit when MSE is no longer decreasing *or* we go + below ~1e-20 MSE, whichever happens first. */ + +static void Newton_Raphson_Maehly(float *a,int ord,float *r){ + int i, k, count=0, maxiter=0; + double error=1.,besterror=1.; + double *root=alloca(ord*sizeof(double)); + + for(i=0; i<ord;i++) root[i] = 2.0 * (i+0.5) / ord - 1.0; + + while(error>1.e-20){ + error=0; + + for(i=0; i<ord; i++) { /* Update each point. */ + double ac=0.,pp=0.,delta; + double rooti=root[i]; + double p=a[ord]; + for(k=ord-1; k>= 0; k--) { + + pp= pp* rooti + p; + p = p * rooti+ a[k]; + if (k != i) ac += 1./(rooti - root[k]); + } + ac=p*ac; + + delta = p/(pp-ac); + + /* don't allow the correction to scream off into infinity if we + happened to polish right at a local mini/maximum */ + + if(delta<-3)delta=-3; + if(delta>3.)delta=3.; /* 3 is not a random choice; it's large + enough to make sure the first pass + can't accidentally limit two poles to + the same value in a fatal nonelastic + collision. */ + + root[i] -= delta; + error += delta*delta; + } + + if(maxiter && count>maxiter && error>=besterror)break; + + /* anything to help out the polisher; converge using doubles */ + if(!count || error<besterror){ + for(i=0; i<ord; i++) r[i]=root[i]; + besterror=error; + if(error<1.e-6){ /* rough minimum criteria */ + maxiter=count*2+10; + if(maxiter>100)maxiter=100; + } + } + + count++; + } + + /* Replaced the original bubble sort with a real sort. With your + help, we can eliminate the bubble sort in our lifetime. --Monty */ + + qsort(r,ord,sizeof(float),comp); + +} + +/* Convert lpc coefficients to lsp coefficients */ +void vorbis_lpc_to_lsp(float *lpc,float *lsp,int m){ + int order2=m/2; + float *g1=alloca(sizeof(float)*(order2+1)); + float *g2=alloca(sizeof(float)*(order2+1)); + float *g1r=alloca(sizeof(float)*(order2+1)); + float *g2r=alloca(sizeof(float)*(order2+1)); + int i; + + /* Compute the lengths of the x polynomials. */ + /* Compute the first half of K & R F1 & F2 polynomials. */ + /* Compute half of the symmetric and antisymmetric polynomials. */ + /* Remove the roots at +1 and -1. */ + + g1[order2] = 1.0; + for(i=0;i<order2;i++) g1[order2-i-1] = lpc[i]+lpc[m-i-1]; + g2[order2] = 1.0; + for(i=0;i<order2;i++) g2[order2-i-1] = lpc[i]-lpc[m-i-1]; + + for(i=0; i<order2;i++) g1[order2-i-1] -= g1[order2-i]; + for(i=0; i<order2;i++) g2[order2-i-1] += g2[order2-i]; + + /* Convert into polynomials in cos(alpha) */ + cheby(g1,order2); + cheby(g2,order2); + + /* Find the roots of the 2 even polynomials.*/ + + Newton_Raphson_Maehly(g1,order2,g1r); + Newton_Raphson_Maehly(g2,order2,g2r); + + for(i=0;i<m;i+=2){ + lsp[i] = acos(g1r[i/2]); + lsp[i+1] = acos(g2r[i/2]); + } +} diff --git a/lib/os.h b/lib/os.h new file mode 100644 index 00000000..1b3cf6a0 --- /dev/null +++ b/lib/os.h @@ -0,0 +1,115 @@ +#ifndef _OS_H +#define _OS_H +/******************************************************************** + * * + * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY * + * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. * + * PLEASE READ THESE TERMS DISTRIBUTING. * + * * + * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 * + * by Monty <monty@xiph.org> and The XIPHOPHORUS Company * + * http://www.xiph.org/ * + * * + ******************************************************************** + + function: #ifdef jail to whip a few platforms into the UNIX ideal. + last mod: $Id: os.h,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $ + + ********************************************************************/ + +#include <math.h> +#include <ogg/os_types.h> + +#ifndef _V_IFDEFJAIL_H_ +#define _V_IFDEFJAIL_H_ + +#ifndef M_PI +#define M_PI (3.1415926539) +#endif + +#ifndef __GNUC__ +#ifdef _WIN32 +# include <malloc.h> +# define rint(x) (floor((x)+0.5)) +#endif +#endif + +#ifdef _WIN32 +# define FAST_HYPOT(a, b) sqrt((a)*(a) + (b)*(b)) +#else /* if not _WIN32 */ +# define FAST_HYPOT hypot +#endif + +#endif + +#ifdef HAVE_ALLOCA_H +#include <alloca.h> +#endif + +#ifdef USE_MEMORY_H +#include <memory.h> +#endif + +#ifndef min +# define min(x,y) ((x)>(y)?(y):(x)) +#endif + +#ifndef max +# define max(x,y) ((x)<(y)?(y):(x)) +#endif + + +#if defined(__i386__) && defined(__GNUC__) + +/* both GCC and MSVC are kinda stupid about rounding/casting to int. + Because of encapsulation constraints (GCC can't see inside the asm + block and so we end up doing stupid things like a store/load that + is collectively a noop), we do it this way */ + +/* we must set up the fpu before this works!! */ + +typedef ogg_int16_t vorbis_fpu_control; + +static inline void vorbis_fpu_setround(vorbis_fpu_control *fpu){ + ogg_int16_t ret; + ogg_int16_t temp; + __asm__ __volatile__("fnstcw %0\n\t" + "movw %0,%%dx\n\t" + "orw $62463,%%dx\n\t" + "movw %%dx,%1\n\t" + "fldcw %1\n\t":"=m"(ret):"m"(temp): "dx"); + *fpu=ret; +} + +static inline void vorbis_fpu_restore(vorbis_fpu_control fpu){ + __asm__ __volatile__("fldcw %0":: "m"(fpu)); +} + +/* assumes the FPU is in round mode! */ +static inline int vorbis_ftoi(double f){ /* yes, double! Otherwise, + we get extra fst/fld to + truncate precision */ + int i; + __asm__("fistl %0": "=m"(i) : "t"(f)); + return(i); +} + +#else + +static int vorbis_ftoi(double f){ + return (int)(f+.5); +} + + +typedef vorbis_fpu_control int; + +static inline void vorbis_fpu_setround(vorbis_fpu_control *fpu){ +} + +static inline void vorbis_fpu_restore(vorbis_fpu_control fpu){ +} + +#endif + +#endif /* _OS_H */ diff --git a/lib/vorbisfile.c b/lib/vorbisfile.c index 425ea433..3799786a 100644 --- a/lib/vorbisfile.c +++ b/lib/vorbisfile.c @@ -12,7 +12,7 @@ ******************************************************************** function: stdio-based convenience library for opening/seeking/decoding - last mod: $Id: vorbisfile.c,v 1.30.2.1 2000/10/14 03:14:07 xiphmont Exp $ + last mod: $Id: vorbisfile.c,v 1.30.2.2 2000/10/19 10:21:02 xiphmont Exp $ ********************************************************************/ @@ -1130,6 +1130,7 @@ long ov_read(OggVorbis_File *vf,char *buffer,int length, long channels=ov_info(vf,-1)->channels; long bytespersample=word * channels; + vorbis_fpu_control fpu; if(samples>length/bytespersample)samples=length/bytespersample; /* a tight loop to pack each size */ @@ -1137,63 +1138,79 @@ long ov_read(OggVorbis_File *vf,char *buffer,int length, int val; if(word==1){ int off=(sgned?0:128); + vorbis_fpu_setround(&fpu); for(j=0;j<samples;j++) for(i=0;i<channels;i++){ - val=(int)(pcm[i][j]*128. + 0.5); + val=vorbis_ftoi(pcm[i][j]*128.); if(val>127)val=127; else if(val<-128)val=-128; *buffer++=val+off; } + vorbis_fpu_restore(fpu); }else{ int off=(sgned?0:32768); if(host_endian==bigendianp){ if(sgned){ + + vorbis_fpu_setround(&fpu); for(i=0;i<channels;i++) { /* It's faster in this order */ float *src=pcm[i]; short *dest=((short *)buffer)+i; for(j=0;j<samples;j++) { - val=(int)(src[j]*32768. + 0.5); + val=vorbis_ftoi(src[j]*32768.); if(val>32767)val=32767; else if(val<-32768)val=-32768; *dest=val; dest+=channels; } } + vorbis_fpu_restore(fpu); + }else{ + + vorbis_fpu_setround(&fpu); for(i=0;i<channels;i++) { float *src=pcm[i]; short *dest=((short *)buffer)+i; for(j=0;j<samples;j++) { - val=(int)(src[j]*32768. + 0.5); + val=vorbis_ftoi(src[j]*32768.); if(val>32767)val=32767; else if(val<-32768)val=-32768; *dest=val+off; dest+=channels; } } + vorbis_fpu_restore(fpu); + } }else if(bigendianp){ + + vorbis_fpu_setround(&fpu); for(j=0;j<samples;j++) for(i=0;i<channels;i++){ - val=(int)(pcm[i][j]*32768. + 0.5); + val=vorbis_ftoi(pcm[i][j]*32768.); if(val>32767)val=32767; else if(val<-32768)val=-32768; val+=off; *buffer++=(val>>8); *buffer++=(val&0xff); } + vorbis_fpu_restore(fpu); + }else{ int val; + vorbis_fpu_setround(&fpu); for(j=0;j<samples;j++) for(i=0;i<channels;i++){ - val=(int)(pcm[i][j]*32768. + 0.5); + val=vorbis_ftoi(pcm[i][j]*32768.); if(val>32767)val=32767; else if(val<-32768)val=-32768; val+=off; *buffer++=(val&0xff); *buffer++=(val>>8); } + vorbis_fpu_restore(fpu); } } |