cast/rounding asm for vorbisfile float->int and lsp lookups

svn path=/branches/branch_beta3/vorbis/; revision=738
author: Monty <xiphmont@xiph.org> 2000-10-19 10:21:02 +0000
committer: Monty <xiphmont@xiph.org> 2000-10-19 10:21:02 +0000
commit: b074c6e416dcabef56ebbcce6ae4e5e1c12af088 (patch)
tree: 592cf3d976c66bd003ca9805d2d5940d0f572b4c
parent: a454e26f286b9b9503b10d092110e326a78cebc4 (diff)
download: libvorbis-git-b074c6e416dcabef56ebbcce6ae4e5e1c12af088.tar.gz
4 files changed, 601 insertions, 6 deletions
diff --git a/lib/lookup.c b/lib/lookup.c
new file mode 100644
index 00000000..5589a1ed
--- /dev/null
+++ b/lib/lookup.c
@@ -0,0 +1,94 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
+ * PLEASE READ THESE TERMS DISTRIBUTING.                            *
+ *                                                                  *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
+ * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
+ * http://www.xiph.org/                                             *
+ *                                                                  *
+ ********************************************************************
+
+  function: lookup based functions
+  last mod: $Id: lookup.c,v 1.2.2.1 2000/10/19 10:21:02 xiphmont Exp $
+
+ ********************************************************************/
+
+#include <math.h>
+#include "lookup.h"
+#include "lookup_data.h"
+#include "os.h"
+
+#ifdef FLOAT_LOOKUP
+
+/* interpolated lookup based cos function, domain 0 to PI only */
+float vorbis_coslook(float a){
+  double d=a*(.31830989*(float)COS_LOOKUP_SZ);
+  int i=vorbis_ftoi(d-.5);
+
+  return COS_LOOKUP[i]+ (d-i)*(COS_LOOKUP[i+1]-COS_LOOKUP[i]);
+}
+
+/* interpolated 1./sqrt(p) where .5 <= p < 1. */
+float vorbis_invsqlook(float a){
+  double d=a*(2.*(float)INVSQ_LOOKUP_SZ)-(float)INVSQ_LOOKUP_SZ;
+  int i=vorbis_ftoi(d-.5);
+  return INVSQ_LOOKUP[i]+ (d-i)*(INVSQ_LOOKUP[i+1]-INVSQ_LOOKUP[i]);
+}
+
+/* interpolated 1./sqrt(p) where .5 <= p < 1. */
+float vorbis_invsq2explook(int a){
+  return INVSQ2EXP_LOOKUP[a-INVSQ2EXP_LOOKUP_MIN];
+}
+
+#include <stdio.h>
+/* interpolated lookup based fromdB function, domain -140dB to 0dB only */
+float vorbis_fromdBlook(float a){
+  int i=vorbis_ftoi(a*((float)(-(1<<FROMdB2_SHIFT)))-5.);
+  return (i<0)?1.:
+    ((i>=(FROMdB_LOOKUP_SZ<<FROMdB_SHIFT))?0.:
+     FROMdB_LOOKUP[i>>FROMdB_SHIFT]*FROMdB2_LOOKUP[i&FROMdB2_MASK]);
+}
+
+#endif
+
+#ifdef INT_LOOKUP
+/* interpolated 1./sqrt(p) where .5 <= a < 1. (.100000... to .111111...) in
+   16.16 format 
+
+   returns in m.8 format */
+long vorbis_invsqlook_i(long a,long e){
+  long i=(a&0x7fff)>>(INVSQ_LOOKUP_I_SHIFT-1); 
+  long d=(a&INVSQ_LOOKUP_I_MASK)<<(16-INVSQ_LOOKUP_I_SHIFT); /*  0.16 */
+  long val=INVSQ_LOOKUP_I[i]-                                /*  1.16 */
+    (((INVSQ_LOOKUP_I[i]-INVSQ_LOOKUP_I[i+1])*               /*  0.16 */
+      d)>>16);                                               /* result 1.16 */
+  
+  e+=32;
+  if(e&1)val=(val*5792)>>13; /* multiply val by 1/sqrt(2) */
+  e=(e>>1)-8;
+
+  return(val>>e);
+}
+
+/* interpolated lookup based fromdB function, domain -140dB to 0dB only */
+/* a is in n.12 format */
+float vorbis_fromdBlook_i(long a){
+  int i=(-a)>>(12-FROMdB2_SHIFT);
+  return (i<0)?1.:
+    ((i>=(FROMdB_LOOKUP_SZ<<FROMdB_SHIFT))?0.:
+     FROMdB_LOOKUP[i>>FROMdB_SHIFT]*FROMdB2_LOOKUP[i&FROMdB2_MASK]);
+}
+
+/* interpolated lookup based cos function, domain 0 to PI only */
+/* a is in 0.16 format, where 0==0, 2^^16-1==PI, return 0.14 */
+long vorbis_coslook_i(long a){
+  int i=a>>COS_LOOKUP_I_SHIFT;
+  int d=a&COS_LOOKUP_I_MASK;
+  return COS_LOOKUP_I[i]- ((d*(COS_LOOKUP_I[i]-COS_LOOKUP_I[i+1]))>>
+			   COS_LOOKUP_I_SHIFT);
+}
+
+#endif
diff --git a/lib/lsp.c b/lib/lsp.c
new file mode 100644
index 00000000..d78be58e
--- /dev/null
+++ b/lib/lsp.c
@@ -0,0 +1,369 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
+ * PLEASE READ THESE TERMS DISTRIBUTING.                            *
+ *                                                                  *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
+ * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
+ * http://www.xiph.org/                                             *
+ *                                                                  *
+ ********************************************************************
+
+  function: LSP (also called LSF) conversion routines
+  last mod: $Id: lsp.c,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $
+
+  The LSP generation code is taken (with minimal modification) from
+  "On the Computation of the LSP Frequencies" by Joseph Rothweiler
+  <rothwlr@altavista.net>, available at:
+  
+  http://www2.xtdl.com/~rothwlr/lsfpaper/lsfpage.html 
+
+ ********************************************************************/
+
+/* Note that the lpc-lsp conversion finds the roots of polynomial with
+   an iterative root polisher (CACM algorithm 283).  It *is* possible
+   to confuse this algorithm into not converging; that should only
+   happen with absurdly closely spaced roots (very sharp peaks in the
+   LPC f response) which in turn should be impossible in our use of
+   the code.  If this *does* happen anyway, it's a bug in the floor
+   finder; find the cause of the confusion (probably a single bin
+   spike or accidental near-float-limit resolution problems) and
+   correct it. */
+
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include "lsp.h"
+#include "os.h"
+#include "misc.h"
+#include "lookup.h"
+#include "scales.h"
+
+/* three possible LSP to f curve functions; the exact computation
+   (float), a lookup based float implementation, and an integer
+   implementation.  The float lookup is likely the optimal choice on
+   any machine with an FPU.  The integer implementation is *not* fixed
+   point (due to the need for a large dynamic range and thus a
+   seperately tracked exponent) and thus much more complex than the
+   relatively simple float implementations. It's mostly for future
+   work on a fully fixed point implementation for processors like the
+   ARM family. */
+
+/* undefine both for the 'old' but more precise implementation */
+#define  FLOAT_LOOKUP
+#undef   INT_LOOKUP
+
+#ifdef FLOAT_LOOKUP
+#include "lookup.c" /* catch this in the build system; we #include for
+                       compilers (like gcc) that can't inline across
+                       modules */
+
+/* side effect: changes *lsp to cosines of lsp */
+void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
+			    float amp,float ampoffset){
+  int i;
+  float wdel=M_PI/ln;
+  vorbis_fpu_control fpu;
+  
+  vorbis_fpu_setround(&fpu);
+  for(i=0;i<m;i++)lsp[i]=vorbis_coslook(lsp[i]);
+
+  i=0;
+  while(i<n){
+    int j,k=map[i];
+    int qexp;
+    float p=.7071067812;
+    float q=.7071067812;
+    float w=vorbis_coslook(wdel*k);
+
+    for(j=0;j<m;j+=2)    p *= lsp[j]-w;
+    for(j=1;j<m;j+=2)    q *= lsp[j]-w;
+
+    q=frexp(p*p*(1.+w)+q*q*(1.-w),&qexp);
+    q=vorbis_fromdBlook(amp*             
+			vorbis_invsqlook(q)*
+			vorbis_invsq2explook(qexp+m)- 
+			ampoffset);
+
+    curve[i++]=q;
+    while(map[i]==k)curve[i++]=q;
+  }
+  vorbis_fpu_restore(fpu);
+}
+
+#else
+
+#ifdef INT_LOOKUP
+#include "lookup.c" /* catch this in the build system; we #include for
+                       compilers (like gcc) that can't inline across
+                       modules */
+
+static int MLOOP_1[64]={
+   0,10,11,11, 12,12,12,12, 13,13,13,13, 13,13,13,13,
+  14,14,14,14, 14,14,14,14, 14,14,14,14, 14,14,14,14,
+  15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+  15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+};
+
+static int MLOOP_2[64]={
+  0,4,5,5, 6,6,6,6, 7,7,7,7, 7,7,7,7,
+  8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8,
+  9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9,
+  9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9,
+};
+
+static int MLOOP_3[8]={0,1,2,2,3,3,3,3};
+
+
+/* side effect: changes *lsp to cosines of lsp */
+void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
+			    float amp,float ampoffset){
+
+  /* 0 <= m < 256 */
+
+  /* set up for using all int later */
+  int i;
+  int ampoffseti=rint(ampoffset*4096.);
+  int ampi=rint(amp*16.);
+  long *ilsp=alloca(m*sizeof(long));
+  for(i=0;i<m;i++)ilsp[i]=vorbis_coslook_i(lsp[i]/M_PI*65536.+.5);
+
+  i=0;
+  while(i<n){
+    int j,k=map[i];
+    unsigned long pi=46341; /* 2**-.5 in 0.16 */
+    unsigned long qi=46341;
+    int qexp=0,shift;
+    long wi=vorbis_coslook_i(k*65536/ln);
+
+    pi*=labs(ilsp[0]-wi);
+    qi*=labs(ilsp[1]-wi);
+
+    for(j=2;j<m;j+=2){
+      if(!(shift=MLOOP_1[(pi|qi)>>25]))
+	if(!(shift=MLOOP_2[(pi|qi)>>19]))
+	  shift=MLOOP_3[(pi|qi)>>16];
+      pi=(pi>>shift)*labs(ilsp[j]-wi);
+      qi=(qi>>shift)*labs(ilsp[j+1]-wi);
+      qexp+=shift;
+    }
+    if(!(shift=MLOOP_1[(pi|qi)>>25]))
+      if(!(shift=MLOOP_2[(pi|qi)>>19]))
+	shift=MLOOP_3[(pi|qi)>>16];
+    pi>>=shift;
+    qi>>=shift;
+    qexp+=shift-7*m;
+
+    /* pi,qi normalized collectively, both tracked using qexp */
+
+    /* p*=p(1-w), q*=q(1+w), let normalization drift because it isn't
+       worth tracking step by step */
+
+    pi=((pi*pi)>>16);
+    qi=((qi*qi)>>16);
+    qexp=qexp*2+m;
+
+    qi*=(1<<14)-wi;
+    pi*=(1<<14)+wi;
+    
+    qi=(qi+pi)>>14;
+
+    /* we've let the normalization drift because it wasn't important;
+       however, for the lookup, things must be normalized again.  We
+       need at most one right shift or a number of left shifts */
+
+    if(qi&0xffff0000){ /* checks for 1.xxxxxxxxxxxxxxxx */
+      qi>>=1; qexp++; 
+    }else
+      while(qi && !(qi&0x8000)){ /* checks for 0.0xxxxxxxxxxxxxxx or less*/
+	qi<<=1; qexp--; 
+      }
+
+    amp=vorbis_fromdBlook_i(ampi*                     /*  n.4         */
+			    vorbis_invsqlook_i(qi,qexp)- 
+			                              /*  m.8, m+n<=8 */
+			    ampoffseti);              /*  8.12[0]     */
+
+    curve[i]=amp;
+    while(map[++i]==k)curve[i]=amp;
+  }
+}
+
+#else 
+
+/* old, nonoptimized but simple version for any poor sap who needs to
+   figure out what the hell this code does, or wants the other tiny
+   fraction of a dB precision */
+
+/* side effect: changes *lsp to cosines of lsp */
+void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
+			    float amp,float ampoffset){
+  int i;
+  float wdel=M_PI/ln;
+  for(i=0;i<m;i++)lsp[i]=2*cos(lsp[i]);
+
+  i=0;
+  while(i<n){
+    int j,k=map[i];
+    float p=.5;
+    float q=.5;
+    float w=2*cos(wdel*k);
+    for(j=0;j<m;j+=2){
+      p *= w-lsp[j];
+      q *= w-lsp[j+1];
+    }
+    p*=p*(2.+w);
+    q*=q*(2.-w);
+    q=fromdB(amp/sqrt(p+q)-ampoffset);
+
+    curve[i]=q;
+    while(map[++i]==k)curve[i]=q;
+  }
+}
+
+#endif
+#endif
+
+static void cheby(float *g, int ord) {
+  int i, j;
+
+  g[0] *= 0.5;
+  for(i=2; i<= ord; i++) {
+    for(j=ord; j >= i; j--) {
+      g[j-2] -= g[j];
+      g[j] += g[j]; 
+    }
+  }
+}
+
+static int comp(const void *a,const void *b){
+  if(*(float *)a<*(float *)b)
+    return(1);
+  else
+    return(-1);
+}
+
+/* This is one of those 'mathemeticians should not write code' kind of
+   cases.  Newton's method of polishing roots is straightforward
+   enough... except in those cases where it just fails in the real
+   world.  In our case below, we're worried about a local mini/maxima
+   shooting a root estimation off to infinity, or the new estimation
+   chaotically oscillating about convergence (shouldn't actually be a
+   problem in our usage.
+
+   Maehly's modification (zero suppression, to prevent two tenative
+   roots from collapsing to the same actual root) similarly can
+   temporarily shoot a root off toward infinity.  It would come
+   back... if it were not for the fact that machine representation has
+   limited dynamic range and resolution.  This too is guarded by
+   limiting delta.
+
+   Last problem is convergence criteria; we don't know what a 'double'
+   is on our hardware/compiler, and the convergence limit is bounded
+   by roundoff noise.  So, we hack convergence:
+
+   Require at most 1e-6 mean squared error for all zeroes.  When
+   converging, start the clock ticking at 1e-6; limit our polishing to
+   as many more iterations as took us to get this far, 100 max.
+
+   Past max iters, quit when MSE is no longer decreasing *or* we go
+   below ~1e-20 MSE, whichever happens first. */
+
+static void Newton_Raphson_Maehly(float *a,int ord,float *r){
+  int i, k, count=0, maxiter=0;
+  double error=1.,besterror=1.;
+  double *root=alloca(ord*sizeof(double));
+
+  for(i=0; i<ord;i++) root[i] = 2.0 * (i+0.5) / ord - 1.0;
+  
+  while(error>1.e-20){
+    error=0;
+    
+    for(i=0; i<ord; i++) { /* Update each point. */
+      double ac=0.,pp=0.,delta;
+      double rooti=root[i];
+      double p=a[ord];
+      for(k=ord-1; k>= 0; k--) {
+
+	pp= pp* rooti + p;
+	p = p * rooti+ a[k];
+	if (k != i) ac += 1./(rooti - root[k]);
+      }
+      ac=p*ac;
+
+      delta = p/(pp-ac);
+
+      /* don't allow the correction to scream off into infinity if we
+         happened to polish right at a local mini/maximum */
+
+      if(delta<-3)delta=-3;
+      if(delta>3.)delta=3.; /* 3 is not a random choice; it's large
+                               enough to make sure the first pass
+                               can't accidentally limit two poles to
+                               the same value in a fatal nonelastic
+                               collision.  */
+
+      root[i] -= delta;
+      error += delta*delta;
+    }
+    
+    if(maxiter && count>maxiter && error>=besterror)break;
+
+    /* anything to help out the polisher; converge using doubles */
+    if(!count || error<besterror){
+      for(i=0; i<ord; i++) r[i]=root[i]; 
+      besterror=error;
+      if(error<1.e-6){ /* rough minimum criteria */
+	maxiter=count*2+10;
+	if(maxiter>100)maxiter=100;
+      }
+    }
+
+    count++;
+  }
+
+  /* Replaced the original bubble sort with a real sort.  With your
+     help, we can eliminate the bubble sort in our lifetime. --Monty */
+  
+  qsort(r,ord,sizeof(float),comp);
+
+}
+
+/* Convert lpc coefficients to lsp coefficients */
+void vorbis_lpc_to_lsp(float *lpc,float *lsp,int m){
+  int order2=m/2;
+  float *g1=alloca(sizeof(float)*(order2+1));
+  float *g2=alloca(sizeof(float)*(order2+1));
+  float *g1r=alloca(sizeof(float)*(order2+1));
+  float *g2r=alloca(sizeof(float)*(order2+1));
+  int i;
+
+  /* Compute the lengths of the x polynomials. */
+  /* Compute the first half of K & R F1 & F2 polynomials. */
+  /* Compute half of the symmetric and antisymmetric polynomials. */
+  /* Remove the roots at +1 and -1. */
+  
+  g1[order2] = 1.0;
+  for(i=0;i<order2;i++) g1[order2-i-1] = lpc[i]+lpc[m-i-1];
+  g2[order2] = 1.0;
+  for(i=0;i<order2;i++) g2[order2-i-1] = lpc[i]-lpc[m-i-1];
+  
+  for(i=0; i<order2;i++) g1[order2-i-1] -= g1[order2-i];
+  for(i=0; i<order2;i++) g2[order2-i-1] += g2[order2-i];
+
+  /* Convert into polynomials in cos(alpha) */
+  cheby(g1,order2);
+  cheby(g2,order2);
+
+  /* Find the roots of the 2 even polynomials.*/
+  
+  Newton_Raphson_Maehly(g1,order2,g1r);
+  Newton_Raphson_Maehly(g2,order2,g2r);
+  
+  for(i=0;i<m;i+=2){
+    lsp[i] = acos(g1r[i/2]);
+    lsp[i+1] = acos(g2r[i/2]);
+  }
+}
diff --git a/lib/os.h b/lib/os.h
new file mode 100644
index 00000000..1b3cf6a0
--- /dev/null
+++ b/lib/os.h
@@ -0,0 +1,115 @@
+#ifndef _OS_H
+#define _OS_H
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
+ * PLEASE READ THESE TERMS DISTRIBUTING.                            *
+ *                                                                  *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
+ * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
+ * http://www.xiph.org/                                             *
+ *                                                                  *
+ ********************************************************************
+
+ function: #ifdef jail to whip a few platforms into the UNIX ideal.
+ last mod: $Id: os.h,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $
+
+ ********************************************************************/
+
+#include <math.h>
+#include <ogg/os_types.h>
+
+#ifndef _V_IFDEFJAIL_H_
+#define _V_IFDEFJAIL_H_
+
+#ifndef M_PI
+#define M_PI (3.1415926539)
+#endif
+
+#ifndef __GNUC__
+#ifdef _WIN32
+#  include <malloc.h>
+#  define rint(x)   (floor((x)+0.5)) 
+#endif
+#endif
+
+#ifdef _WIN32
+#  define FAST_HYPOT(a, b) sqrt((a)*(a) + (b)*(b))
+#else /* if not _WIN32 */
+#  define FAST_HYPOT hypot
+#endif
+
+#endif
+
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
+
+#ifdef USE_MEMORY_H
+#include <memory.h>
+#endif
+
+#ifndef min
+#  define min(x,y)  ((x)>(y)?(y):(x))
+#endif
+
+#ifndef max
+#  define max(x,y)  ((x)<(y)?(y):(x))
+#endif
+
+
+#if defined(__i386__) && defined(__GNUC__)
+
+/* both GCC and MSVC are kinda stupid about rounding/casting to int.
+   Because of encapsulation constraints (GCC can't see inside the asm
+   block and so we end up doing stupid things like a store/load that
+   is collectively a noop), we do it this way */
+
+/* we must set up the fpu before this works!! */
+
+typedef ogg_int16_t vorbis_fpu_control;
+
+static inline void vorbis_fpu_setround(vorbis_fpu_control *fpu){
+  ogg_int16_t ret;
+  ogg_int16_t temp;
+  __asm__ __volatile__("fnstcw %0\n\t"
+	  "movw %0,%%dx\n\t"
+	  "orw $62463,%%dx\n\t"
+	  "movw %%dx,%1\n\t"
+	  "fldcw %1\n\t":"=m"(ret):"m"(temp): "dx");
+  *fpu=ret;
+}
+
+static inline void vorbis_fpu_restore(vorbis_fpu_control fpu){
+  __asm__ __volatile__("fldcw %0":: "m"(fpu));
+}
+
+/* assumes the FPU is in round mode! */
+static inline int vorbis_ftoi(double f){  /* yes, double!  Otherwise,
+                                             we get extra fst/fld to
+                                             truncate precision */
+  int i;
+  __asm__("fistl %0": "=m"(i) : "t"(f));
+  return(i);
+}
+
+#else
+
+static int vorbis_ftoi(double f){
+  return (int)(f+.5);
+}
+
+
+typedef vorbis_fpu_control int;
+
+static inline void vorbis_fpu_setround(vorbis_fpu_control *fpu){
+}
+
+static inline void vorbis_fpu_restore(vorbis_fpu_control fpu){
+}
+
+#endif
+
+#endif /* _OS_H */
diff --git a/lib/vorbisfile.c b/lib/vorbisfile.c
index 425ea433..3799786a 100644
--- a/lib/vorbisfile.c
+++ b/lib/vorbisfile.c
@@ -12,7 +12,7 @@
  ********************************************************************
 
  function: stdio-based convenience library for opening/seeking/decoding
- last mod: $Id: vorbisfile.c,v 1.30.2.1 2000/10/14 03:14:07 xiphmont Exp $
+ last mod: $Id: vorbisfile.c,v 1.30.2.2 2000/10/19 10:21:02 xiphmont Exp $
 
  ********************************************************************/
 
@@ -1130,6 +1130,7 @@ long ov_read(OggVorbis_File *vf,char *buffer,int length,
 
 	long channels=ov_info(vf,-1)->channels;
 	long bytespersample=word * channels;
+	vorbis_fpu_control fpu;
 	if(samples>length/bytespersample)samples=length/bytespersample;
 	
 	/* a tight loop to pack each size */
@@ -1137,63 +1138,79 @@ long ov_read(OggVorbis_File *vf,char *buffer,int length,
 	  int val;
 	  if(word==1){
 	    int off=(sgned?0:128);
+	    vorbis_fpu_setround(&fpu);
 	    for(j=0;j<samples;j++)
 	      for(i=0;i<channels;i++){
-		val=(int)(pcm[i][j]*128. + 0.5);
+		val=vorbis_ftoi(pcm[i][j]*128.);
 		if(val>127)val=127;
 		else if(val<-128)val=-128;
 		*buffer++=val+off;
 	      }
+	    vorbis_fpu_restore(fpu);
 	  }else{
 	    int off=(sgned?0:32768);
 
 	    if(host_endian==bigendianp){
 	      if(sgned){
+
+		vorbis_fpu_setround(&fpu);
 		for(i=0;i<channels;i++) { /* It's faster in this order */
 		  float *src=pcm[i];
 		  short *dest=((short *)buffer)+i;
 		  for(j=0;j<samples;j++) {
-		    val=(int)(src[j]*32768. + 0.5);
+		    val=vorbis_ftoi(src[j]*32768.);
 		    if(val>32767)val=32767;
 		    else if(val<-32768)val=-32768;
 		    *dest=val;
 		    dest+=channels;
 		  }
 		}
+		vorbis_fpu_restore(fpu);
+
 	      }else{
+
+		vorbis_fpu_setround(&fpu);
 		for(i=0;i<channels;i++) {
 		  float *src=pcm[i];
 		  short *dest=((short *)buffer)+i;
 		  for(j=0;j<samples;j++) {
-		    val=(int)(src[j]*32768. + 0.5);
+		    val=vorbis_ftoi(src[j]*32768.);
 		    if(val>32767)val=32767;
 		    else if(val<-32768)val=-32768;
 		    *dest=val+off;
 		    dest+=channels;
 		  }
 		}
+		vorbis_fpu_restore(fpu);
+
 	      }
 	    }else if(bigendianp){
+
+	      vorbis_fpu_setround(&fpu);
 	      for(j=0;j<samples;j++)
 		for(i=0;i<channels;i++){
-		  val=(int)(pcm[i][j]*32768. + 0.5);
+		  val=vorbis_ftoi(pcm[i][j]*32768.);
 		  if(val>32767)val=32767;
 		  else if(val<-32768)val=-32768;
 		  val+=off;
 		  *buffer++=(val>>8);
 		  *buffer++=(val&0xff);
 		}
+	      vorbis_fpu_restore(fpu);
+
 	    }else{
 	      int val;
+	      vorbis_fpu_setround(&fpu);
 	      for(j=0;j<samples;j++)
 	 	for(i=0;i<channels;i++){
-		  val=(int)(pcm[i][j]*32768. + 0.5);
+		  val=vorbis_ftoi(pcm[i][j]*32768.);
 		  if(val>32767)val=32767;
 		  else if(val<-32768)val=-32768;
 		  val+=off;
 		  *buffer++=(val&0xff);
 		  *buffer++=(val>>8);
 	  	}
+	      vorbis_fpu_restore(fpu);  
 
 	    }
 	  }
author	Monty <xiphmont@xiph.org>	2000-10-19 10:21:02 +0000
committer	Monty <xiphmont@xiph.org>	2000-10-19 10:21:02 +0000
commit	b074c6e416dcabef56ebbcce6ae4e5e1c12af088 (patch)
tree	592cf3d976c66bd003ca9805d2d5940d0f572b4c
parent	a454e26f286b9b9503b10d092110e326a78cebc4 (diff)
download	libvorbis-git-b074c6e416dcabef56ebbcce6ae4e5e1c12af088.tar.gz